diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash index 7c558c72c6..27d79c515f 100644 --- a/.github/scripts/fbgemm_gpu_build.bash +++ b/.github/scripts/fbgemm_gpu_build.bash @@ -141,7 +141,7 @@ __configure_fbgemm_gpu_build_cpu () { # Update the package name and build args depending on if CUDA is specified echo "[BUILD] Setting CPU-only build args ..." build_args=( - --package_variant=cpu + --build-variant=cpu ) } @@ -149,7 +149,7 @@ __configure_fbgemm_gpu_build_docs () { # Update the package name and build args depending on if CUDA is specified echo "[BUILD] Setting CPU-only (docs) build args ..." build_args=( - --package_variant=docs + --build-variant=docs ) } @@ -206,7 +206,7 @@ __configure_fbgemm_gpu_build_rocm () { # https://rocm.docs.amd.com/en/docs-6.1.1/reference/rocmcc.html echo "[BUILD] Setting ROCm build args ..." build_args=( - --package_variant=rocm + --build-variant=rocm # HIP_ROOT_DIR now required for HIP to be correctly detected by CMake -DHIP_ROOT_DIR=/opt/rocm # ROCm CMake complains about missing AMDGPU_TARGETS, so we explicitly set this @@ -284,7 +284,7 @@ __configure_fbgemm_gpu_build_cuda () { echo "[BUILD] Setting CUDA build args ..." build_args=( - --package_variant=cuda + --build-variant=cuda --nvml_lib_path="${nvml_lib_path}" --nccl_lib_path="${nccl_lib_path}" # Pass to PyTorch CMake @@ -303,10 +303,9 @@ __configure_fbgemm_gpu_build_genai () { __configure_fbgemm_gpu_build_cuda "$fbgemm_variant_targets" || return 1 - # Replace the package_variant flag, since GenAI is also a CUDA-type build - for i in "${!build_args[@]}"; do - build_args[i]="${build_args[i]/--package_variant=cuda/--package_variant=genai}" - done + build_args+=( + --build-target=genai + ) } # shellcheck disable=SC2120 diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash index 139c6cdc75..2eabfe635c 100644 --- a/.github/scripts/fbgemm_gpu_install.bash +++ b/.github/scripts/fbgemm_gpu_install.bash @@ -39,12 +39,15 @@ __install_fetch_version_and_variant_info () { echo "[CHECK] Printing out the FBGEMM-GPU version ..." # shellcheck disable=SC2086,SC2155 - installed_fbgemm_gpu_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)") + installed_fbgemm_target=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__target__)") # shellcheck disable=SC2086,SC2155 - installed_fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)") + installed_fbgemm_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)") + # shellcheck disable=SC2086,SC2155 + installed_fbgemm_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)") echo "################################################################################" - echo "[CHECK] The installed VERSION of FBGEMM_GPU is: ${installed_fbgemm_gpu_version}" - echo "[CHECK] The installed VARIANT of FBGEMM_GPU is: ${installed_fbgemm_gpu_variant}" + echo "[CHECK] The installed FBGEMM TARGET is: ${installed_fbgemm_target}" + echo "[CHECK] The installed FBGEMM VARIANT is: ${installed_fbgemm_variant}" + echo "[CHECK] The installed FBGEMM VERSION is: ${installed_fbgemm_version}" echo "################################################################################" echo "" } @@ -53,7 +56,7 @@ __install_check_subpackages () { # shellcheck disable=SC2086,SC2155 local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))") - if [ "$installed_fbgemm_gpu_variant" == "cuda" ] || [ "$installed_fbgemm_gpu_variant" == "genai" ]; then + if [ "$installed_fbgemm_target" == "genai" ]; then # shellcheck disable=SC2086,SC2155 local experimental_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu.experimental; print(dir(fbgemm_gpu.experimental))") fi @@ -74,7 +77,7 @@ __install_check_subpackages () { "fbgemm_gpu.tbe.cache" ) - if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then + if [ "$installed_fbgemm_target" != "genai" ]; then subpackages+=( "fbgemm_gpu.split_embedding_codegen_lookup_invokers" "fbgemm_gpu.tbe.ssd" @@ -89,7 +92,7 @@ __install_check_subpackages () { __install_check_operator_registrations () { echo "[INSTALL] Check for operator registrations ..." - if [ "$installed_fbgemm_gpu_variant" == "genai" ]; then + if [ "$installed_fbgemm_target" == "genai" ]; then local test_operators=( "torch.ops.fbgemm.nccl_init" "torch.ops.fbgemm.gqa_attn_splitk" diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash index 59ef177745..e2ce5c1ea1 100644 --- a/.github/scripts/fbgemm_gpu_test.bash +++ b/.github/scripts/fbgemm_gpu_test.bash @@ -154,16 +154,16 @@ __setup_fbgemm_gpu_test () { # Configure the environment for ignored test suites for each FBGEMM_GPU # variant - if [ "$fbgemm_gpu_variant" == "cpu" ]; then + if [ "$fbgemm_build_variant" == "cpu" ]; then echo "[TEST] Configuring for CPU-based testing ..." __configure_fbgemm_gpu_test_cpu - elif [ "$fbgemm_gpu_variant" == "rocm" ]; then + elif [ "$fbgemm_build_variant" == "rocm" ]; then echo "[TEST] Configuring for ROCm-based testing ..." __configure_fbgemm_gpu_test_rocm else - echo "[TEST] FBGEMM_GPU variant is ${fbgemm_gpu_variant}; configuring for CUDA-based testing ..." + echo "[TEST] FBGEMM_GPU variant is ${fbgemm_build_variant}; configuring for CUDA-based testing ..." __configure_fbgemm_gpu_test_cuda fi @@ -189,9 +189,6 @@ __setup_fbgemm_gpu_test () { echo "[TEST] Checking imports ..." (test_python_import_package "${env_name}" fbgemm_gpu) || return 1 - if [ "$fbgemm_gpu_variant" != "genai" ]; then - (test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1 - fi # Set the feature flags to enable experimental features as needed __set_feature_flags @@ -251,21 +248,19 @@ __run_fbgemm_gpu_tests_in_directory () { __determine_test_directories () { target_directories=() - if [ "$fbgemm_gpu_variant" != "genai" ]; then - target_directories+=( - fbgemm_gpu/test - ) - fi - - if [ "$fbgemm_gpu_variant" == "genai" ]; then + if [ "$fbgemm_build_target" == "genai" ]; then target_directories+=( fbgemm_gpu/experimental/example/test fbgemm_gpu/experimental/gemm/test fbgemm_gpu/experimental/gen_ai/test ) + else + target_directories+=( + fbgemm_gpu/test + ) fi - echo "[TEST] Determined the testing directories:" + echo "[TEST] Determined the test directories:" for test_dir in "${target_directories[@]}"; do echo "$test_dir" done @@ -274,14 +269,10 @@ __determine_test_directories () { test_all_fbgemm_gpu_modules () { env_name="$1" - fbgemm_gpu_variant="$2" if [ "$env_name" == "" ]; then - echo "Usage: ${FUNCNAME[0]} ENV_NAME [FBGEMM_GPU_VARIANT]" + echo "Usage: ${FUNCNAME[0]} ENV_NAME" echo "Example(s):" echo " ${FUNCNAME[0]} build_env # Test all FBGEMM_GPU modules applicable to to the installed variant" - echo " ${FUNCNAME[0]} build_env cpu # Test all FBGEMM_GPU modules applicable to CPU" - echo " ${FUNCNAME[0]} build_env cuda # Test all FBGEMM_GPU modules applicable to CUDA" - echo " ${FUNCNAME[0]} build_env rocm # Test all FBGEMM_GPU modules applicable to ROCm" return 1 else echo "################################################################################" @@ -295,14 +286,13 @@ test_all_fbgemm_gpu_modules () { # shellcheck disable=SC2155 local env_prefix=$(env_name_or_prefix "${env_name}") - # Determine the FBGEMM_GPU varaiant if needed - if [ "$fbgemm_gpu_variant" == "" ]; then - echo "[TEST] FBGEMM_GPU variant not explicitly provided by user; will automatically determine from the FBGEMM_GPU installation ..." - # shellcheck disable=SC2086 - fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)") - echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}" - echo "[TEST] Will be running tests specific to this variant ..." - fi + # Determine the FBGEMM build target and variant + # shellcheck disable=SC2086 + fbgemm_build_target=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__target__)") + # shellcheck disable=SC2086 + fbgemm_build_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)") + echo "[TEST] Determined FBGEMM_GPU (target : variant) from installation: (${fbgemm_build_target} : ${fbgemm_build_variant})" + echo "[TEST] Will be running tests specific to this target and variant ..." # Determine the test directories to include for testing __determine_test_directories @@ -312,9 +302,9 @@ test_all_fbgemm_gpu_modules () { # Iterate through the test directories and run bulk tests for test_dir in "${target_directories[@]}"; do - cd "${test_dir}" || return 1 - __run_fbgemm_gpu_tests_in_directory "${env_name}" "${fbgemm_gpu_variant}" || return 1 - cd - || return 1 + cd "${test_dir}" || return 1 + __run_fbgemm_gpu_tests_in_directory "${env_name}" || return 1 + cd - || return 1 done } diff --git a/.github/scripts/nova_postscript.bash b/.github/scripts/nova_postscript.bash index 40448b2f55..92827ec355 100644 --- a/.github/scripts/nova_postscript.bash +++ b/.github/scripts/nova_postscript.bash @@ -66,7 +66,7 @@ fi $CONDA_RUN python3 -c "import torch; print('cuda.is_available() ', torch.cuda.is_available()); print ('device_count() ',torch.cuda.device_count());" cd "${FBGEMM_REPO}" || { echo "[NOVA] Failed to cd to ${FBGEMM_REPO} from $(pwd)"; }; -test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}" "${fbgemm_variant}" +test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}" end_time=$(date +%s) runtime=$((end_time-start_time)) start_time=${end_time} diff --git a/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst index 64a51a0f92..3999273951 100644 --- a/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst +++ b/fbgemm_gpu/docs/src/fbgemm_genai/development/BuildInstructions.rst @@ -147,7 +147,8 @@ toolchains have been properly installed. # Build the wheel artifact only python setup.py bdist_wheel \ - --package_variant=genai \ + --build-target=genai \ + --build-variant=cuda \ --python-tag="${python_tag}" \ --plat-name="${python_plat_name}" \ --nvml_lib_path=${NVML_LIB_PATH} \ @@ -156,11 +157,57 @@ toolchains have been properly installed. # Build and install the library into the Conda environment python setup.py install \ - --package_variant=genai \ + --build-target=genai \ + --build-variant=cuda \ --nvml_lib_path=${NVML_LIB_PATH} \ --nccl_lib_path=${NCCL_LIB_PATH} \ -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}" +.. _fbgemm-gpu.build.process.rocm: + +ROCm Build +---------- + +For ROCm builds, ``ROCM_PATH`` and ``PYTORCH_ROCM_ARCH`` need to be specified. +The presence of a ROCm device, however, is not required for building +the package. + +Similar to CUDA builds, building with Clang + ``libstdc++`` can be enabled by +appending ``--cxxprefix=$CONDA_PREFIX`` to the build command, presuming the +toolchains have been properly installed. + +.. code:: sh + + # !! Run in fbgemm_gpu/ directory inside the Conda environment !! + + export ROCM_PATH=/path/to/rocm + + # [OPTIONAL] Enable verbose HIPCC logs + export HIPCC_VERBOSE=1 + + # Build for the target architecture of the ROCm device installed on the machine (e.g. 'gfx908,gfx90a,gfx942') + # See https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html for list + export PYTORCH_ROCM_ARCH=$(${ROCM_PATH}/bin/rocminfo | grep -o -m 1 'gfx.*') + + # Build the wheel artifact only + python setup.py bdist_wheel \ + --build-target=genai \ + --build-variant=rocm \ + --python-tag="${python_tag}" \ + --plat-name="${python_plat_name}" \ + -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \ + -DHIP_ROOT_DIR="${ROCM_PATH}" \ + -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ + -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" + + # Build and install the library into the Conda environment + python setup.py install \ + --build-target=genai \ + --build-variant=rocm \ + -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \ + -DHIP_ROOT_DIR="${ROCM_PATH}" \ + -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ + -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" Post-Build Checks (For Developers) ---------------------------------- diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst index 2a8332ea2f..1147107e23 100644 --- a/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst +++ b/fbgemm_gpu/docs/src/fbgemm_gpu/development/BuildInstructions.rst @@ -542,16 +542,16 @@ For CPU-only builds, the ``--cpu_only`` flag needs to be specified. # Build the wheel artifact only python setup.py bdist_wheel \ - --package_variant=cpu \ + --build-variant=cpu \ --python-tag="${python_tag}" \ --plat-name="${python_plat_name}" # Build and install the library into the Conda environment (GCC) python setup.py install \ - --package_variant=cpu + --build-variant=cpu # NOTE: To build the package as part of generating the documentation, use - # `--package_variant=docs` flag instead! + # `--build-variant=docs` flag instead! To build using Clang + ``libstdc++`` instead of GCC, simply append the ``--cxxprefix`` flag: @@ -562,14 +562,14 @@ To build using Clang + ``libstdc++`` instead of GCC, simply append the # Build the wheel artifact only python setup.py bdist_wheel \ - --package_variant=cpu \ + --build-variant=cpu \ --python-tag="${python_tag}" \ --plat-name="${python_plat_name}" \ --cxxprefix=$CONDA_PREFIX # Build and install the library into the Conda environment (Clang) python setup.py install \ - --package_variant=cpu + --build-variant=cpu --cxxprefix=$CONDA_PREFIX Note that this presumes the Clang toolchain is properly installed along with the @@ -642,7 +642,7 @@ toolchains have been properly installed. # Build the wheel artifact only python setup.py bdist_wheel \ - --package_variant=cuda \ + --build-variant=cuda \ --python-tag="${python_tag}" \ --plat-name="${python_plat_name}" \ --nvml_lib_path=${NVML_LIB_PATH} \ @@ -651,7 +651,7 @@ toolchains have been properly installed. # Build and install the library into the Conda environment python setup.py install \ - --package_variant=cuda \ + --build-variant=cuda \ --nvml_lib_path=${NVML_LIB_PATH} \ --nccl_lib_path=${NCCL_LIB_PATH} \ -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}" @@ -684,7 +684,7 @@ presuming the toolchains have been properly installed. # Build the wheel artifact only python setup.py bdist_wheel \ - --package_variant=rocm \ + --build-variant=rocm \ --python-tag="${python_tag}" \ --plat-name="${python_plat_name}" \ -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \ @@ -694,7 +694,7 @@ presuming the toolchains have been properly installed. # Build and install the library into the Conda environment python setup.py install \ - --package_variant=rocm \ + --build-variant=rocm \ -DAMDGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \ -DHIP_ROOT_DIR="${ROCM_PATH}" \ -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ diff --git a/fbgemm_gpu/fbgemm_gpu/__init__.py b/fbgemm_gpu/fbgemm_gpu/__init__.py index 2422daa296..8b57d68019 100644 --- a/fbgemm_gpu/fbgemm_gpu/__init__.py +++ b/fbgemm_gpu/fbgemm_gpu/__init__.py @@ -31,10 +31,15 @@ def _load_library(filename: str, no_throw: bool = False) -> None: try: # Export the version string from the version file auto-generated by setup.py - from fbgemm_gpu.docs.version import __variant__, __version__ # noqa: F401, E402 + from fbgemm_gpu.docs.version import ( # noqa: F401, E402 + __target__, + __variant__, + __version__, + ) except Exception: __variant__: str = "INTERNAL" __version__: str = "INTERNAL" + __target__: str = "INTERNAL" fbgemm_gpu_libraries = [ "fbgemm_gpu_config", @@ -52,7 +57,7 @@ def _load_library(filename: str, no_throw: bool = False) -> None: "fbgemm_gpu_py", ] -fbgemm_gpu_genai_libraries = [ +fbgemm_genai_libraries = [ "experimental/gen_ai/fbgemm_gpu_experimental_gen_ai", ] @@ -64,17 +69,14 @@ def _load_library(filename: str, no_throw: bool = False) -> None: # .SO file for the ROCm case, so that clients can import # fbgemm_gpu.experimental.gemm without triggering an error. if torch.cuda.is_available() and torch.version.hip: - fbgemm_gpu_genai_libraries = [] + fbgemm_genai_libraries = [] libraries_to_load = { - "cpu": fbgemm_gpu_libraries, - "docs": fbgemm_gpu_libraries, - "cuda": fbgemm_gpu_libraries, - "genai": fbgemm_gpu_genai_libraries, - "rocm": fbgemm_gpu_libraries, + "default": fbgemm_gpu_libraries, + "genai": fbgemm_genai_libraries, } -for library in libraries_to_load.get(__variant__, []): +for library in libraries_to_load.get(__target__, []): # NOTE: In all cases, we want to throw an error if we cannot load the # library. However, this appears to break the OSS documentation build, # where the Python documentation doesn't show up in the generated docs. diff --git a/fbgemm_gpu/setup.py b/fbgemm_gpu/setup.py index 5b3aa80099..cb03df3986 100644 --- a/fbgemm_gpu/setup.py +++ b/fbgemm_gpu/setup.py @@ -7,6 +7,7 @@ # @licenselint-loose-mode import argparse +import logging import os import re import subprocess @@ -23,6 +24,8 @@ from skbuild import setup from tabulate import tabulate +logging.basicConfig(level=logging.INFO) + @dataclass(frozen=True) class FbgemmGpuBuild: @@ -50,9 +53,16 @@ def from_args(cls, argv: List[str]): help="Print build information only.", ) parser.add_argument( - "--package_variant", + "--build-target", + type=str, + choices=["default", "genai"], + default="default", + help="The FBGEMM_GPU variant to build.", + ) + parser.add_argument( + "--build-variant", type=str, - choices=["docs", "cpu", "cuda", "rocm", "genai"], + choices=["docs", "cpu", "cuda", "rocm"], default="cuda", help="The FBGEMM_GPU variant to build.", ) @@ -102,37 +112,30 @@ def nova_flag(self) -> Optional[int]: else: return None - def package_name(self) -> str: - pkg_name: str = "fbgemm_gpu" + def nova_non_prebuild_step(self) -> bool: + # When running in Nova workflow context, the actual package build is run + # in the Nova CI's "pre-script" step, as denoted by the `BUILD_FROM_NOVA` + # flag. As such, we skip building in the clean and build wheel steps. + return self.nova_flag() == 1 - if self.nova_flag() == 1: - # When running in Nova workflow context, the actual package build is - # run in the Nova CI's "pre-script" step, as denoted by the - # `BUILD_FROM_NOVA` flag. As such, we skip building in the clean - # and build wheel steps. - print( - "[SETUP.PY] Running under Nova workflow context (clean or build wheel step) ... exiting" - ) - sys.exit(0) + def target(self) -> str: + return self.args.build_target - elif self.nova_flag() == 0: - # In Nova, we are publishing genai packages separately from the main - # fbgemm_gpu package, so if the package variant is genai, we need to - # update the package name accordingly. Otherwise, the package name - # is the same for all other build variants in Nova - if self.args.package_variant == "genai": - pkg_name = "fbgemm_gpu_genai" + def variant(self) -> str: + return self.args.build_variant - else: + def package_name(self) -> str: + pkg_name: str = "fbgemm_gpu_genai" if self.target() == "genai" else "fbgemm_gpu" + + if self.nova_flag() is None: # If running outside of Nova workflow context, append the channel # and variant to the package name as needed if self.args.package_channel != "release": pkg_name += f"_{self.args.package_channel}" - if self.args.package_variant != "cuda": - pkg_name += f"-{self.args.package_variant}" + if self.variant() != "cuda": + pkg_name += f"-{self.variant()}" - print(f"[SETUP.PY] Determined the Python package name: '{pkg_name}'") return pkg_name def variant_version(self) -> str: @@ -143,25 +146,24 @@ def variant_version(self) -> str: # `python setup.py`, this script is invoked twice, once as # `setup.py egg_info`, and once as `setup.py bdist_wheel`. # Ignore determining the variant_version for the first case. - print( + logging.debug( "[SETUP.PY] Script was invoked as `setup.py egg_info`, ignoring variant_version" ) - return pkg_vver + return "" elif self.nova_flag() is None: - # If not running in a Nova workflow, then use the - # `fbgemm_gpu-` naming convention for the package, since - # PyPI does not accept version+xx in the naming convention. - print( + # If not running in a Nova workflow, ignore the variant version and + # use the `fbgemm_gpu-` package naming convention instead, + # since PyPI does not accept version+xx in the naming convention. + logging.debug( "[SETUP.PY] Not running under Nova workflow context; ignoring variant_version" ) - return pkg_vver + return "" # NOTE: This is a workaround for the fact that we currently overload # package target (e.g. GPU, GenAI), and variant (e.g. CPU, CUDA, ROCm) - # into the same `package_variant` variable, and should be fixed soon. - if self.args.package_variant == "cuda" or self.args.package_variant == "genai": - CudaUtils.set_cuda_environment_variables() + # into the same `build_variant` variable, and should be fixed soon. + if self.variant() == "cuda": if torch.version.cuda is not None: cuda_version = torch.version.cuda.split(".") pkg_vver = f"+cu{cuda_version[0]}{cuda_version[1]}" @@ -170,7 +172,7 @@ def variant_version(self) -> str: "[SETUP.PY] The installed PyTorch variant is not CUDA; cannot determine the CUDA version!" ) - elif self.args.package_variant == "rocm": + elif self.variant() == "rocm": if torch.version.hip is not None: rocm_version = torch.version.hip.split(".") # NOTE: Unlike CUDA-based releases, which ignores the minor patch version, @@ -190,20 +192,19 @@ def variant_version(self) -> str: else: pkg_vver = "+cpu" - print(f"[SETUP.PY] Extracted the package variant+version: '{pkg_vver}'") return pkg_vver def package_version(self): pkg_vver = self.variant_version() - print("[SETUP.PY] Extracting the package version ...") - print( + logging.debug("[SETUP.PY] Extracting the package version ...") + logging.debug( f"[SETUP.PY] TAG: {gitversion.get_tag()}, BRANCH: {gitversion.get_branch()}, SHA: {gitversion.get_sha()}" ) if self.args.package_channel == "nightly": # Use date stamp for nightly versions - print( + logging.debug( "[SETUP.PY] Package is for NIGHTLY; using timestamp for the versioning" ) today = date.today() @@ -241,12 +242,12 @@ def package_version(self): ) full_version_string = f"{pkg_version}{pkg_vver}" - print( + logging.debug( f"[SETUP.PY] Setting the full package version string: {full_version_string}" ) return full_version_string - def cmake_args(self) -> None: + def cmake_args(self) -> List[str]: def _get_cxx11_abi(): try: value = int(torch._C._GLIBCXX_USE_CXX11_ABI) @@ -257,7 +258,7 @@ def _get_cxx11_abi(): return f"-D_GLIBCXX_USE_CXX11_ABI={value}" torch_root = os.path.dirname(torch.__file__) - os.environ["CMAKE_BUILD_PARALLEL_LEVEL"] = str(os.cpu_count() // 2) + os.environ["CMAKE_BUILD_PARALLEL_LEVEL"] = str((os.cpu_count() or 4) // 2) cmake_args = [ f"-DCMAKE_PREFIX_PATH={torch_root}", @@ -267,36 +268,31 @@ def _get_cxx11_abi(): cxx_flags = [] if self.args.verbose: - print("[SETUP.PY] Building in VERBOSE mode ...") + # Enable verbose logging in CMake cmake_args.extend( ["-DCMAKE_VERBOSE_MAKEFILE=ON", "-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE"] ) if self.args.debug: - # Enable device-side assertions in CUDA and HIP + # Enable torch device-side assertions for CUDA and HIP # https://stackoverflow.com/questions/44284275/passing-compiler-options-in-cmake-command-line cxx_flags.extend(["-DTORCH_USE_CUDA_DSA", "-DTORCH_USE_HIP_DSA"]) - if self.args.package_variant in ["docs", "cpu"]: - # NOTE: The docs variant is a fake variant that is effectively the - # cpu variant, but marks __VARIANT__ as "docs" instead of "cpu". - # - # This minor change lets the library loader know not throw - # exceptions on failed load, which is the workaround for a bug in - # the Sphinx documentation generation process, see: - # - # https://github.com/pytorch/FBGEMM/pull/3477 - # https://github.com/pytorch/FBGEMM/pull/3717 - print("[SETUP.PY] Building the CPU ...") - cmake_args.append("-DFBGEMM_BUILD_VARIANT=cpu") - - if self.args.package_variant == "rocm": - print("[SETUP.PY] Building the ROCm variant ...") - cmake_args.append("-DFBGEMM_BUILD_VARIANT=rocm") - - if self.args.package_variant == "genai": - print("[SETUP.PY] Building the GENAI-ONLY variant of FBGEMM_GPU ...") - cmake_args.append("-DFBGEMM_BUILD_TARGET=genai") + print(f"[SETUP.PY] Setting the FBGEMM build target: {self.target()} ...") + cmake_args.append(f"-DFBGEMM_BUILD_TARGET={self.target()}") + + # NOTE: The docs variant is a fake variant that is effectively the + # cpu variant, but marks __VARIANT__ as "docs" instead of "cpu". + # + # This minor change lets the library loader know not throw + # exceptions on failed load, which is the workaround for a bug in + # the Sphinx documentation generation process, see: + # + # https://github.com/pytorch/FBGEMM/pull/3477 + # https://github.com/pytorch/FBGEMM/pull/3717 + cmake_bvariant = "cpu" if self.variant() == "docs" else self.variant() + print(f"[SETUP.PY] Setting the FBGEMM build variant: {cmake_bvariant} ...") + cmake_args.append(f"-DFBGEMM_BUILD_VARIANT={cmake_bvariant}") if self.args.nvml_lib_path: cmake_args.append(f"-DNVML_LIB_PATH={self.args.nvml_lib_path}") @@ -316,7 +312,7 @@ def _get_cxx11_abi(): cmake_args.append("-DUSE_FB_ONLY=ON") if self.args.cxxprefix: - print("[SETUP.PY] Setting CMake flags ...") + logging.debug("[SETUP.PY] Setting CMake flags ...") path = self.args.cxxprefix cxx_flags.extend( @@ -353,7 +349,7 @@ class CudaUtils: """CUDA Utilities""" @classmethod - def nvcc_ok(cls, cuda_home: str, major: int, minor: int) -> bool: + def nvcc_ok(cls, cuda_home: Optional[str], major: int, minor: int) -> bool: if not cuda_home: return False @@ -455,7 +451,8 @@ def generate_version_file(cls, build: FbgemmGpuBuild) -> None: # LICENSE file in the root directory of this source tree. __version__: str = "{package_version}" - __variant__: str = "{build.args.package_variant}" + __target__: str = "{build.target()}" + __variant__: str = "{build.variant()}" """ ) file.write(text) @@ -521,9 +518,20 @@ def run(self): def main(argv: List[str]) -> None: # Handle command line args before passing to main setup() method. build = FbgemmGpuBuild.from_args(argv) - # Repair command line args for setup. + # Repair command line args for setup() method. sys.argv = [sys.argv[0]] + build.other_args + # Skip the build step if running under Nova non-prebuild step + if build.nova_non_prebuild_step(): + print( + "[SETUP.PY] Running under Nova workflow context (clean or build wheel step) ... exiting" + ) + sys.exit(0) + + # Set the CUDA environment variables if needed + if build.variant() == "cuda": + CudaUtils.set_cuda_environment_variables() + # Extract the package name package_name = build.package_name() @@ -532,9 +540,8 @@ def main(argv: List[str]) -> None: if build.args.dryrun: print( - f"[SETUP.PY] Extracted package name and version: ({package_name} : {package_version})" + f"[SETUP.PY] Determined the package name and variant+version: ({package_name} : {package_version})\n" ) - print("") sys.exit(0) # Generate the version file