[fbgemm_gpu] Refactor setup.py to accomodate build targets and variants

q10 · q10 · commit fac354111f3d · 2025-04-26T22:50:58.000-07:00
- Refactor setup.py to accomodate build targets and variants

- Update docs to include building GenAI package for ROCm
diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
@@ -141,15 +141,15 @@ __configure_fbgemm_gpu_build_cpu () {
   # Update the package name and build args depending on if CUDA is specified
   echo "[BUILD] Setting CPU-only build args ..."
   build_args=(
-    --package_variant=cpu
+    --build-variant=cpu
   )
 }
 
 __configure_fbgemm_gpu_build_docs () {
   # Update the package name and build args depending on if CUDA is specified
   echo "[BUILD] Setting CPU-only (docs) build args ..."
   build_args=(
-    --package_variant=docs
+    --build-variant=docs
   )
 }
 
@@ -206,7 +206,7 @@ __configure_fbgemm_gpu_build_rocm () {
   #   https://rocm.docs.amd.com/en/docs-6.1.1/reference/rocmcc.html
   echo "[BUILD] Setting ROCm build args ..."
   build_args=(
-    --package_variant=rocm
+    --build-variant=rocm
     # HIP_ROOT_DIR now required for HIP to be correctly detected by CMake
     -DHIP_ROOT_DIR=/opt/rocm
     # ROCm CMake complains about missing AMDGPU_TARGETS, so we explicitly set this
@@ -284,7 +284,7 @@ __configure_fbgemm_gpu_build_cuda () {
 
   echo "[BUILD] Setting CUDA build args ..."
   build_args=(
-    --package_variant=cuda
+    --build-variant=cuda
     --nvml_lib_path="${nvml_lib_path}"
     --nccl_lib_path="${nccl_lib_path}"
     # Pass to PyTorch CMake
@@ -303,10 +303,9 @@ __configure_fbgemm_gpu_build_genai () {
 
   __configure_fbgemm_gpu_build_cuda "$fbgemm_variant_targets" || return 1
 
-  # Replace the package_variant flag, since GenAI is also a CUDA-type build
-  for i in "${!build_args[@]}"; do
-    build_args[i]="${build_args[i]/--package_variant=cuda/--package_variant=genai}"
-  done
+  build_args+=(
+    --build-target=genai
+  )
 }
 
 # shellcheck disable=SC2120
diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash
@@ -39,12 +39,15 @@ __install_fetch_version_and_variant_info () {
 
   echo "[CHECK] Printing out the FBGEMM-GPU version ..."
   # shellcheck disable=SC2086,SC2155
-  installed_fbgemm_gpu_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)")
+  installed_fbgemm_target=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__target__)")
   # shellcheck disable=SC2086,SC2155
-  installed_fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
+  installed_fbgemm_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
+  # shellcheck disable=SC2086,SC2155
+  installed_fbgemm_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)")
   echo "################################################################################"
-  echo "[CHECK] The installed VERSION of FBGEMM_GPU is: ${installed_fbgemm_gpu_version}"
-  echo "[CHECK] The installed VARIANT of FBGEMM_GPU is: ${installed_fbgemm_gpu_variant}"
+  echo "[CHECK] The installed FBGEMM TARGET is: ${installed_fbgemm_target}"
+  echo "[CHECK] The installed FBGEMM VARIANT is: ${installed_fbgemm_variant}"
+  echo "[CHECK] The installed FBGEMM VERSION is: ${installed_fbgemm_version}"
   echo "################################################################################"
   echo ""
 }
@@ -53,7 +56,7 @@ __install_check_subpackages () {
   # shellcheck disable=SC2086,SC2155
   local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))")
 
-  if [ "$installed_fbgemm_gpu_variant" == "cuda" ] || [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
+  if [ "$installed_fbgemm_target" == "genai" ]; then
     # shellcheck disable=SC2086,SC2155
     local experimental_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu.experimental; print(dir(fbgemm_gpu.experimental))")
   fi
@@ -74,7 +77,7 @@ __install_check_subpackages () {
     "fbgemm_gpu.tbe.cache"
   )
 
-  if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then
+  if [ "$installed_fbgemm_target" != "genai" ]; then
     subpackages+=(
       "fbgemm_gpu.split_embedding_codegen_lookup_invokers"
       "fbgemm_gpu.tbe.ssd"
@@ -89,7 +92,7 @@ __install_check_subpackages () {
 
 __install_check_operator_registrations () {
   echo "[INSTALL] Check for operator registrations ..."
-  if [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
+  if [ "$installed_fbgemm_target" == "genai" ]; then
     local test_operators=(
       "torch.ops.fbgemm.nccl_init"
       "torch.ops.fbgemm.gqa_attn_splitk"
diff --git a/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst
@@ -147,7 +147,8 @@ toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=genai \
+      --build-target=genai \
+      --build-variant=cuda \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --nvml_lib_path=${NVML_LIB_PATH} \
@@ -156,11 +157,57 @@ toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=genai \
+      --build-target=genai \
+      --build-variant=cuda \
       --nvml_lib_path=${NVML_LIB_PATH} \
       --nccl_lib_path=${NCCL_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
 
+.. _fbgemm-gpu.build.process.rocm:
+
+ROCm Build
+----------
+
+For ROCm builds, ``ROCM_PATH`` and ``PYTORCH_ROCM_ARCH`` need to be specified.
+The presence of a ROCm device, however, is not required for building
+the package.
+
+Similar to CUDA builds, building with Clang + ``libstdc++`` can be enabled by
+appending ``--cxxprefix=$CONDA_PREFIX`` to the build command, presuming the
+toolchains have been properly installed.
+
+.. code:: sh
+
+  # !! Run in fbgemm_gpu/ directory inside the Conda environment !!
+
+  export ROCM_PATH=/path/to/rocm
+
+  # [OPTIONAL] Enable verbose HIPCC logs
+  export HIPCC_VERBOSE=1
+
+  # Build for the target architecture of the ROCm device installed on the machine (e.g. 'gfx908,gfx90a,gfx942')
+  # See https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html for list
+  export PYTORCH_ROCM_ARCH=$(${ROCM_PATH}/bin/rocminfo | grep -o -m 1 'gfx.*')
+
+  # Build the wheel artifact only
+  python setup.py bdist_wheel \
+      --build-target=genai \
+      --build-variant=rocm \
+      --python-tag="${python_tag}" \
+      --plat-name="${python_plat_name}" \
+      -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
+      -DHIP_ROOT_DIR="${ROCM_PATH}" \
+      -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
+      -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
+
+  # Build and install the library into the Conda environment
+  python setup.py install \
+      --build-target=genai \
+      --build-variant=rocm \
+      -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
+      -DHIP_ROOT_DIR="${ROCM_PATH}" \
+      -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
+      -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
 
 Post-Build Checks (For Developers)
 ----------------------------------
diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst
@@ -542,16 +542,16 @@ For CPU-only builds, the ``--cpu_only`` flag needs to be specified.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cpu \
+      --build-variant=cpu \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}"
 
   # Build and install the library into the Conda environment (GCC)
   python setup.py install \
-      --package_variant=cpu
+      --build-variant=cpu
 
   # NOTE: To build the package as part of generating the documentation, use
-  # `--package_variant=docs` flag instead!
+  # `--build-variant=docs` flag instead!
 
 To build using Clang + ``libstdc++`` instead of GCC, simply append the
 ``--cxxprefix`` flag:
@@ -562,14 +562,14 @@ To build using Clang + ``libstdc++`` instead of GCC, simply append the
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cpu \
+      --build-variant=cpu \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --cxxprefix=$CONDA_PREFIX
 
   # Build and install the library into the Conda environment (Clang)
   python setup.py install \
-      --package_variant=cpu
+      --build-variant=cpu
       --cxxprefix=$CONDA_PREFIX
 
 Note that this presumes the Clang toolchain is properly installed along with the
@@ -642,7 +642,7 @@ toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=cuda \
+      --build-variant=cuda \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       --nvml_lib_path=${NVML_LIB_PATH} \
@@ -651,7 +651,7 @@ toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=cuda \
+      --build-variant=cuda \
       --nvml_lib_path=${NVML_LIB_PATH} \
       --nccl_lib_path=${NCCL_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
@@ -684,7 +684,7 @@ presuming the toolchains have been properly installed.
 
   # Build the wheel artifact only
   python setup.py bdist_wheel \
-      --package_variant=rocm \
+      --build-variant=rocm \
       --python-tag="${python_tag}" \
       --plat-name="${python_plat_name}" \
       -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
@@ -694,7 +694,7 @@ presuming the toolchains have been properly installed.
 
   # Build and install the library into the Conda environment
   python setup.py install \
-      --package_variant=rocm \
+      --build-variant=rocm \
       -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \
       -DHIP_ROOT_DIR="${ROCM_PATH}" \
       -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
diff --git a/fbgemm_gpu/fbgemm_gpu/__init__.py b/fbgemm_gpu/fbgemm_gpu/__init__.py
@@ -31,10 +31,11 @@ def _load_library(filename: str, no_throw: bool = False) -> None:
 
 try:
     # Export the version string from the version file auto-generated by setup.py
-    from fbgemm_gpu.docs.version import __variant__, __version__  # noqa: F401, E402
+    from fbgemm_gpu.docs.version import __target__, __variant__, __version__  # noqa: F401, E402
 except Exception:
     __variant__: str = "INTERNAL"
     __version__: str = "INTERNAL"
+    __target__: str = "INTERNAL"
 
 fbgemm_gpu_libraries = [
     "fbgemm_gpu_config",
@@ -52,7 +53,7 @@ def _load_library(filename: str, no_throw: bool = False) -> None:
     "fbgemm_gpu_py",
 ]
 
-fbgemm_gpu_genai_libraries = [
+fbgemm_genai_libraries = [
     "experimental/gen_ai/fbgemm_gpu_experimental_gen_ai",
 ]
 
@@ -64,17 +65,14 @@ def _load_library(filename: str, no_throw: bool = False) -> None:
 # .SO file for the ROCm case, so that clients can import
 # fbgemm_gpu.experimental.gemm without triggering an error.
 if torch.cuda.is_available() and torch.version.hip:
-    fbgemm_gpu_genai_libraries = []
+    fbgemm_genai_libraries = []
 
 libraries_to_load = {
-    "cpu": fbgemm_gpu_libraries,
-    "docs": fbgemm_gpu_libraries,
-    "cuda": fbgemm_gpu_libraries,
-    "genai": fbgemm_gpu_genai_libraries,
-    "rocm": fbgemm_gpu_libraries,
+    "default": fbgemm_gpu_libraries,
+    "genai": fbgemm_genai_libraries,
 }
 
-for library in libraries_to_load.get(__variant__, []):
+for library in libraries_to_load.get(__target__, []):
     # NOTE: In all cases, we want to throw an error if we cannot load the
     # library.  However, this appears to break the OSS documentation build,
     # where the Python documentation doesn't show up in the generated docs.
diff --git a/fbgemm_gpu/setup.py b/fbgemm_gpu/setup.py