pytorch · q10 · Apr 27, 2025
diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
@@ -141,15 +141,15 @@ __configure_fbgemm_gpu_build_cpu () {
   # Update the package name and build args depending on if CUDA is specified
   echo "[BUILD] Setting CPU-only build args ..."
   build_args=(
-    --package_variant=cpu
+    --build-variant=cpu
   )
 }
 
 __configure_fbgemm_gpu_build_docs () {
   # Update the package name and build args depending on if CUDA is specified
   echo "[BUILD] Setting CPU-only (docs) build args ..."
   build_args=(
-    --package_variant=docs
+    --build-variant=docs
   )
 }
 
@@ -206,7 +206,7 @@ __configure_fbgemm_gpu_build_rocm () {
   #   https://rocm.docs.amd.com/en/docs-6.1.1/reference/rocmcc.html
   echo "[BUILD] Setting ROCm build args ..."
   build_args=(
-    --package_variant=rocm
+    --build-variant=rocm
     # HIP_ROOT_DIR now required for HIP to be correctly detected by CMake
     -DHIP_ROOT_DIR=/opt/rocm
     # ROCm CMake complains about missing AMDGPU_TARGETS, so we explicitly set this
@@ -284,7 +284,7 @@ __configure_fbgemm_gpu_build_cuda () {
 
   echo "[BUILD] Setting CUDA build args ..."
   build_args=(
-    --package_variant=cuda
+    --build-variant=cuda
     --nvml_lib_path="${nvml_lib_path}"
     --nccl_lib_path="${nccl_lib_path}"
     # Pass to PyTorch CMake
@@ -303,10 +303,9 @@ __configure_fbgemm_gpu_build_genai () {
 
   __configure_fbgemm_gpu_build_cuda "$fbgemm_variant_targets" || return 1
 
-  # Replace the package_variant flag, since GenAI is also a CUDA-type build
-  for i in "${!build_args[@]}"; do
-    build_args[i]="${build_args[i]/--package_variant=cuda/--package_variant=genai}"
-  done
+  build_args+=(
+    --build-target=genai
+  )
 }
 
 # shellcheck disable=SC2120

diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash
@@ -39,12 +39,15 @@ __install_fetch_version_and_variant_info () {
 
   echo "[CHECK] Printing out the FBGEMM-GPU version ..."
   # shellcheck disable=SC2086,SC2155
-  installed_fbgemm_gpu_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)")
+  installed_fbgemm_target=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__target__)")
   # shellcheck disable=SC2086,SC2155
-  installed_fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
+  installed_fbgemm_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
+  # shellcheck disable=SC2086,SC2155
+  installed_fbgemm_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)")
   echo "################################################################################"
-  echo "[CHECK] The installed VERSION of FBGEMM_GPU is: ${installed_fbgemm_gpu_version}"
-  echo "[CHECK] The installed VARIANT of FBGEMM_GPU is: ${installed_fbgemm_gpu_variant}"
+  echo "[CHECK] The installed FBGEMM TARGET is: ${installed_fbgemm_target}"
+  echo "[CHECK] The installed FBGEMM VARIANT is: ${installed_fbgemm_variant}"
+  echo "[CHECK] The installed FBGEMM VERSION is: ${installed_fbgemm_version}"
   echo "################################################################################"
   echo ""
 }
@@ -53,7 +56,7 @@ __install_check_subpackages () {
   # shellcheck disable=SC2086,SC2155
   local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))")
 
-  if [ "$installed_fbgemm_gpu_variant" == "cuda" ] || [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
+  if [ "$installed_fbgemm_target" == "genai" ]; then
     # shellcheck disable=SC2086,SC2155
     local experimental_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu.experimental; print(dir(fbgemm_gpu.experimental))")
   fi
@@ -74,7 +77,7 @@ __install_check_subpackages () {
     "fbgemm_gpu.tbe.cache"
   )
 
-  if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then
+  if [ "$installed_fbgemm_target" != "genai" ]; then
     subpackages+=(
       "fbgemm_gpu.split_embedding_codegen_lookup_invokers"
       "fbgemm_gpu.tbe.ssd"
@@ -89,7 +92,7 @@ __install_check_subpackages () {
 
 __install_check_operator_registrations () {
   echo "[INSTALL] Check for operator registrations ..."
-  if [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
+  if [ "$installed_fbgemm_target" == "genai" ]; then
     local test_operators=(
       "torch.ops.fbgemm.nccl_init"
       "torch.ops.fbgemm.gqa_attn_splitk"

diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
@@ -154,16 +154,16 @@ __setup_fbgemm_gpu_test () {
 
   # Configure the environment for ignored test suites for each FBGEMM_GPU
   # variant
-  if [ "$fbgemm_gpu_variant" == "cpu" ]; then
+  if [ "$fbgemm_build_variant" == "cpu" ]; then
     echo "[TEST] Configuring for CPU-based testing ..."
     __configure_fbgemm_gpu_test_cpu
 
-  elif [ "$fbgemm_gpu_variant" == "rocm" ]; then
+  elif [ "$fbgemm_build_variant" == "rocm" ]; then
     echo "[TEST] Configuring for ROCm-based testing ..."
     __configure_fbgemm_gpu_test_rocm
 
   else
-    echo "[TEST] FBGEMM_GPU variant is ${fbgemm_gpu_variant}; configuring for CUDA-based testing ..."
+    echo "[TEST] FBGEMM_GPU variant is ${fbgemm_build_variant}; configuring for CUDA-based testing ..."
     __configure_fbgemm_gpu_test_cuda
   fi
 
@@ -189,9 +189,6 @@ __setup_fbgemm_gpu_test () {
 
   echo "[TEST] Checking imports ..."
   (test_python_import_package "${env_name}" fbgemm_gpu) || return 1
-  if [ "$fbgemm_gpu_variant" != "genai" ]; then
-    (test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
-  fi
 
   # Set the feature flags to enable experimental features as needed
   __set_feature_flags
@@ -251,21 +248,19 @@ __run_fbgemm_gpu_tests_in_directory () {
 __determine_test_directories () {
   target_directories=()
 
-  if [ "$fbgemm_gpu_variant" != "genai" ]; then
-    target_directories+=(
-      fbgemm_gpu/test
-    )
-  fi
-
-  if [ "$fbgemm_gpu_variant" == "genai" ]; then
+  if [ "$fbgemm_build_target" == "genai" ]; then
     target_directories+=(
       fbgemm_gpu/experimental/example/test
       fbgemm_gpu/experimental/gemm/test
       fbgemm_gpu/experimental/gen_ai/test
     )
+  else
+    target_directories+=(
+      fbgemm_gpu/test
+    )
   fi
 
-  echo "[TEST] Determined the testing directories:"
+  echo "[TEST] Determined the test directories:"
   for test_dir in "${target_directories[@]}"; do
     echo "$test_dir"
   done
@@ -274,14 +269,10 @@ __determine_test_directories () {
 
 test_all_fbgemm_gpu_modules () {
   env_name="$1"
-  fbgemm_gpu_variant="$2"
   if [ "$env_name" == "" ]; then
-    echo "Usage: ${FUNCNAME[0]} ENV_NAME [FBGEMM_GPU_VARIANT]"
+    echo "Usage: ${FUNCNAME[0]} ENV_NAME"
     echo "Example(s):"
     echo "    ${FUNCNAME[0]} build_env        # Test all FBGEMM_GPU modules applicable to to the installed variant"
-    echo "    ${FUNCNAME[0]} build_env cpu    # Test all FBGEMM_GPU modules applicable to CPU"
-    echo "    ${FUNCNAME[0]} build_env cuda   # Test all FBGEMM_GPU modules applicable to CUDA"
-    echo "    ${FUNCNAME[0]} build_env rocm   # Test all FBGEMM_GPU modules applicable to ROCm"
     return 1
   else
     echo "################################################################################"
@@ -295,14 +286,13 @@ test_all_fbgemm_gpu_modules () {
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
-  # Determine the FBGEMM_GPU varaiant if needed
-  if [ "$fbgemm_gpu_variant" == "" ]; then
-    echo "[TEST] FBGEMM_GPU variant not explicitly provided by user; will automatically determine from the FBGEMM_GPU installation ..."
-    # shellcheck disable=SC2086
-    fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
-    echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}"
-    echo "[TEST] Will be running tests specific to this variant ..."
-  fi
+  # Determine the FBGEMM build target and variant
+  # shellcheck disable=SC2086
+  fbgemm_build_target=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__target__)")
+  # shellcheck disable=SC2086
+  fbgemm_build_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
+  echo "[TEST] Determined FBGEMM_GPU (target : variant) from installation: (${fbgemm_build_target} : ${fbgemm_build_variant})"
+  echo "[TEST] Will be running tests specific to this target and variant ..."
 
   # Determine the test directories to include for testing
   __determine_test_directories
@@ -312,9 +302,9 @@ test_all_fbgemm_gpu_modules () {
 
   # Iterate through the test directories and run bulk tests
   for test_dir in "${target_directories[@]}"; do
-    cd "${test_dir}"                                                          || return 1
-    __run_fbgemm_gpu_tests_in_directory "${env_name}" "${fbgemm_gpu_variant}" || return 1
-    cd -                                                                      || return 1
+    cd "${test_dir}"                                    || return 1
+    __run_fbgemm_gpu_tests_in_directory "${env_name}"   || return 1
+    cd -                                                || return 1
   done
 }
 

diff --git a/.github/scripts/nova_postscript.bash b/.github/scripts/nova_postscript.bash
@@ -66,7 +66,7 @@ fi
 
 $CONDA_RUN python3 -c "import torch; print('cuda.is_available() ', torch.cuda.is_available()); print ('device_count() ',torch.cuda.device_count());"
 cd "${FBGEMM_REPO}" || { echo "[NOVA] Failed to cd to ${FBGEMM_REPO} from $(pwd)"; };
-test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}" "${fbgemm_variant}"
+test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}"
 end_time=$(date +%s)
 runtime=$((end_time-start_time))
 start_time=${end_time}

diff --git a/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst
@@ -147,7 +147,8 @@ toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=genai \
+      --build-target=genai \
+      --build-variant=cuda \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --nvml_lib_path=${NVML_LIB_PATH} \
@@ -156,11 +157,57 @@ toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=genai \
+      --build-target=genai \
+      --build-variant=cuda \
       --nvml_lib_path=${NVML_LIB_PATH} \
       --nccl_lib_path=${NCCL_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
 
+.. _fbgemm-gpu.build.process.rocm:
+
+ROCm Build
+----------
+
+For ROCm builds, ``ROCM_PATH`` and ``PYTORCH_ROCM_ARCH`` need to be specified.
+The presence of a ROCm device, however, is not required for building
+the package.
+
+Similar to CUDA builds, building with Clang + ``libstdc++`` can be enabled by
+appending ``--cxxprefix=$CONDA_PREFIX`` to the build command, presuming the
+toolchains have been properly installed.
+
+.. code:: sh
+
+  # !! Run in fbgemm_gpu/ directory inside the Conda environment !!
+
+  export ROCM_PATH=/path/to/rocm
+
+  # [OPTIONAL] Enable verbose HIPCC logs
+  export HIPCC_VERBOSE=1
+
+  # Build for the target architecture of the ROCm device installed on the machine (e.g. 'gfx908,gfx90a,gfx942')
+  # See https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html for list
+  export PYTORCH_ROCM_ARCH=$(${ROCM_PATH}/bin/rocminfo | grep -o -m 1 'gfx.*')
+
+  # Build the wheel artifact only
+  python setup.py bdist_wheel \
+      --build-target=genai \
+      --build-variant=rocm \
+      --python-tag="${python_tag}" \
+      --plat-name="${python_plat_name}" \
+      -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
+      -DHIP_ROOT_DIR="${ROCM_PATH}" \
+      -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
+      -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
+
+  # Build and install the library into the Conda environment
+  python setup.py install \
+      --build-target=genai \
+      --build-variant=rocm \
+      -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
+      -DHIP_ROOT_DIR="${ROCM_PATH}" \
+      -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
+      -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
 
 Post-Build Checks (For Developers)
 ----------------------------------

diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst
@@ -542,16 +542,16 @@ For CPU-only builds, the ``--cpu_only`` flag needs to be specified.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cpu \
+      --build-variant=cpu \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}"
 
   # Build and install the library into the Conda environment (GCC)
   python setup.py install \
-      --package_variant=cpu
+      --build-variant=cpu
 
   # NOTE: To build the package as part of generating the documentation, use
-  # `--package_variant=docs` flag instead!
+  # `--build-variant=docs` flag instead!
 
 To build using Clang + ``libstdc++`` instead of GCC, simply append the
 ``--cxxprefix`` flag:
@@ -562,14 +562,14 @@ To build using Clang + ``libstdc++`` instead of GCC, simply append the
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cpu \
+      --build-variant=cpu \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --cxxprefix=$CONDA_PREFIX
 
   # Build and install the library into the Conda environment (Clang)
   python setup.py install \
-      --package_variant=cpu
+      --build-variant=cpu
       --cxxprefix=$CONDA_PREFIX
 
 Note that this presumes the Clang toolchain is properly installed along with the
@@ -642,7 +642,7 @@ toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cuda \
+      --build-variant=cuda \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --nvml_lib_path=${NVML_LIB_PATH} \
@@ -651,7 +651,7 @@ toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=cuda \
+      --build-variant=cuda \
       --nvml_lib_path=${NVML_LIB_PATH} \
       --nccl_lib_path=${NCCL_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
@@ -684,7 +684,7 @@ presuming the toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=rocm \
+      --build-variant=rocm \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
@@ -694,7 +694,7 @@ presuming the toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=rocm \
+      --build-variant=rocm \
       -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
       -DHIP_ROOT_DIR="${ROCM_PATH}" \
       -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \