From 35984fdeb1e5fa5926bc017f6f1c7fb2165ba398 Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Tue, 29 Apr 2025 23:48:00 -0700 Subject: [PATCH] Remove `sm_100` and `sm_120` (#4024) Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/1133 - Remove sm_100 and sm_120 from architectures list and keep just sm_100a and sm_120a instead, to enable compilation for FP4 CUTLASS quantization kernels (https://github.com/pytorch/FBGEMM/pull/4004), since we are running into the following error: ``` Instruction 'cvt with .e2m1x2' not supported on .target 'sm_100' ``` Reviewed By: spcyppt Differential Revision: D73901832 Pulled By: q10 --- .github/scripts/nova_dir.bash | 2 +- fbgemm_gpu/docs/src/general/Releases.rst | 55 ++++++++++++------------ 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/.github/scripts/nova_dir.bash b/.github/scripts/nova_dir.bash index 996734f376..38aae693ce 100644 --- a/.github/scripts/nova_dir.bash +++ b/.github/scripts/nova_dir.bash @@ -23,7 +23,7 @@ fi if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi if [[ "$CU_VERSION" == "cu128" ]]; then - export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;9.0a;10.0;10.0a;12.0;12.0a" + export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;9.0a;10.0a;12.0a" echo "Set TORCH_CUDA_ARCH_LIST to: ${TORCH_CUDA_ARCH_LIST}" elif [[ "$CU_VERSION" == "cu126" ]] || diff --git a/fbgemm_gpu/docs/src/general/Releases.rst b/fbgemm_gpu/docs/src/general/Releases.rst index 2d73075b6b..2ae712d20a 100644 --- a/fbgemm_gpu/docs/src/general/Releases.rst +++ b/fbgemm_gpu/docs/src/general/Releases.rst @@ -10,34 +10,33 @@ FBGEMM is released in accordance to the PyTorch release schedule, and is each release has no guarantee to work in conjunction with PyTorch releases that are older than the one that the FBGEMM release corresponds to. -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| FBGEMM Release | Corresponding | Supported | Supported | Supported CUDA | (Experimental) Supported | (Experimental) Supported | -| | PyTorch Release | Python Versions | CUDA Versions | Architectures | ROCm Versions | ROCm Architectures | -+=================+==================+==================+================+======================+===========================+===========================+ -| 1.2.0 | 2.7.x | 3.9, 3.10, 3.11, | 11.8, 12.6, | 7.0, 8.0, 9.0, 9.0a, | 6.1, 6.2.4, 6.3 | gfx908, gfx90a, gfx942 | -| | | 3.12, 3.13 | 12.8 | 10.0, 10.0a, 12.0, | | | -| | | | | 12.0a | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 1.1.0 | 2.6.x | 3.9, 3.10, 3.11, | 11.8, 12.4, | 7.0, 8.0, 9.0, | 6.1, 6.2.4, 6.3 | gfx908, gfx90a, gfx942 | -| | | 3.12, 3.13 | 12.6 | 9.0a | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 1.0.0 | 2.5.x | 3.9, 3.10, 3.11, | 11.8, 12.1, | 7.0, 8.0, 9.0, | 6.0, 6.1 | gfx908, gfx90a | -| | | 3.12 | 12.4 | 9.0a | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 0.8.0 | 2.4.x | 3.8, 3.9, 3.10, | 11.8, 12.1, | 7.0, 8.0, 9.0, | 6.0, 6.1 | gfx908, gfx90a | -| | | 3.11, 3.12 | 12.4 | 9.0a | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 0.7.0 | 2.3.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 6.0 | gfx908, gfx90a | -| | | 3.11, 3.12 | | | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 0.6.0 | 2.2.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 5.7 | gfx90a | -| | | 3.11, 3.12 | | | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 0.5.0 | 2.1.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 5.5, 5.6 | gfx90a | -| | | 3.11 | | | | | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ -| 0.4.0 | 2.0.x | 3.8, 3.9, 3.10 | 11.7, 11.8 | 7.0, 8.0 | 5.3, 5.4 | gfx90a | -+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| FBGEMM Release | Corresponding | Supported | Supported | Supported CUDA | (Experimental) Supported | (Experimental) Supported | +| | PyTorch Release | Python Versions | CUDA Versions | Architectures | ROCm Versions | ROCm Architectures | ++=================+==================+==================+================+====================+===========================+===========================+ +| 1.2.0 | 2.7.x | 3.9, 3.10, 3.11, | 11.8, 12.6, | 7.0, 8.0, 9.0, | 6.1, 6.2.4, 6.3 | gfx908, gfx90a, gfx942 | +| | | 3.12, 3.13 | 12.8 | 9.0a, 10.0a, 12.0a | | | ++-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+ +| 1.1.0 | 2.6.x | 3.9, 3.10, 3.11, | 11.8, 12.4, | 7.0, 8.0, 9.0, | 6.1, 6.2.4, 6.3 | gfx908, gfx90a, gfx942 | +| | | 3.12, 3.13 | 12.6 | 9.0a | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 1.0.0 | 2.5.x | 3.9, 3.10, 3.11, | 11.8, 12.1, | 7.0, 8.0, 9.0, | 6.0, 6.1 | gfx908, gfx90a | +| | | 3.12 | 12.4 | 9.0a | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 0.8.0 | 2.4.x | 3.8, 3.9, 3.10, | 11.8, 12.1, | 7.0, 8.0, 9.0, | 6.0, 6.1 | gfx908, gfx90a | +| | | 3.11, 3.12 | 12.4 | 9.0a | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 0.7.0 | 2.3.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 6.0 | gfx908, gfx90a | +| | | 3.11, 3.12 | | | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 0.6.0 | 2.2.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 5.7 | gfx90a | +| | | 3.11, 3.12 | | | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 0.5.0 | 2.1.x | 3.8, 3.9, 3.10, | 11.8, 12.1 | 7.0, 8.0, 9.0 | 5.5, 5.6 | gfx90a | +| | | 3.11 | | | | | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ +| 0.4.0 | 2.0.x | 3.8, 3.9, 3.10 | 11.7, 11.8 | 7.0, 8.0 | 5.3, 5.4 | gfx90a | ++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+ Note that the list of supported CUDA and ROCm architectures refer to the targets support available in the default installation packages, and that building for