From 35984fdeb1e5fa5926bc017f6f1c7fb2165ba398 Mon Sep 17 00:00:00 2001
From: Benson Ma <bensonma415@meta.com>
Date: Tue, 29 Apr 2025 23:48:00 -0700
Subject: [PATCH] Remove `sm_100` and `sm_120` (#4024)

Summary:
X-link: https://github.com/facebookresearch/FBGEMM/pull/1133

- Remove sm_100 and sm_120 from architectures list and keep just sm_100a and sm_120a instead, to enable compilation for FP4 CUTLASS quantization kernels (https://github.com/pytorch/FBGEMM/pull/4004), since we are running into the following error:

```
Instruction 'cvt with .e2m1x2' not supported on .target 'sm_100'
```


Reviewed By: spcyppt

Differential Revision: D73901832

Pulled By: q10
---
 .github/scripts/nova_dir.bash            |  2 +-
 fbgemm_gpu/docs/src/general/Releases.rst | 55 ++++++++++++------------
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/.github/scripts/nova_dir.bash b/.github/scripts/nova_dir.bash
index 996734f376..38aae693ce 100644
--- a/.github/scripts/nova_dir.bash
+++ b/.github/scripts/nova_dir.bash
@@ -23,7 +23,7 @@ fi
 if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi
 
 if  [[ "$CU_VERSION" == "cu128" ]]; then
-    export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;9.0a;10.0;10.0a;12.0;12.0a"
+    export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;9.0a;10.0a;12.0a"
     echo "Set TORCH_CUDA_ARCH_LIST to: ${TORCH_CUDA_ARCH_LIST}"
 
 elif [[ "$CU_VERSION" == "cu126" ]] ||
diff --git a/fbgemm_gpu/docs/src/general/Releases.rst b/fbgemm_gpu/docs/src/general/Releases.rst
index 2d73075b6b..2ae712d20a 100644
--- a/fbgemm_gpu/docs/src/general/Releases.rst
+++ b/fbgemm_gpu/docs/src/general/Releases.rst
@@ -10,34 +10,33 @@ FBGEMM is released in accordance to the PyTorch release schedule, and is each
 release has no guarantee to work in conjunction with PyTorch releases that are
 older than the one that the FBGEMM release corresponds to.
 
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| FBGEMM Release  | Corresponding    | Supported        | Supported      | Supported CUDA       | (Experimental) Supported  | (Experimental) Supported  |
-|                 | PyTorch Release  | Python Versions  | CUDA Versions  | Architectures        | ROCm Versions             | ROCm Architectures        |
-+=================+==================+==================+================+======================+===========================+===========================+
-| 1.2.0           | 2.7.x            | 3.9, 3.10, 3.11, | 11.8, 12.6,    | 7.0, 8.0, 9.0, 9.0a, | 6.1, 6.2.4, 6.3           | gfx908, gfx90a, gfx942    |
-|                 |                  | 3.12, 3.13       | 12.8           | 10.0, 10.0a, 12.0,   |                           |                           |
-|                 |                  |                  |                | 12.0a                |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 1.1.0           | 2.6.x            | 3.9, 3.10, 3.11, | 11.8, 12.4,    | 7.0, 8.0, 9.0,       | 6.1, 6.2.4, 6.3           | gfx908, gfx90a, gfx942    |
-|                 |                  | 3.12, 3.13       | 12.6           | 9.0a                 |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 1.0.0           | 2.5.x            | 3.9, 3.10, 3.11, | 11.8, 12.1,    | 7.0, 8.0, 9.0,       | 6.0, 6.1                  | gfx908, gfx90a            |
-|                 |                  | 3.12             | 12.4           | 9.0a                 |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 0.8.0           | 2.4.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1,    | 7.0, 8.0, 9.0,       | 6.0, 6.1                  | gfx908, gfx90a            |
-|                 |                  | 3.11, 3.12       | 12.4           | 9.0a                 |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 0.7.0           | 2.3.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0        | 6.0                       | gfx908, gfx90a            |
-|                 |                  | 3.11, 3.12       |                |                      |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 0.6.0           | 2.2.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0        | 5.7                       | gfx90a                    |
-|                 |                  | 3.11, 3.12       |                |                      |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 0.5.0           | 2.1.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0        | 5.5, 5.6                  | gfx90a                    |
-|                 |                  | 3.11             |                |                      |                           |                           |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
-| 0.4.0           | 2.0.x            | 3.8, 3.9, 3.10   | 11.7, 11.8     | 7.0, 8.0             | 5.3, 5.4                  | gfx90a                    |
-+-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| FBGEMM Release  | Corresponding    | Supported        | Supported      | Supported CUDA     | (Experimental) Supported  | (Experimental) Supported  |
+|                 | PyTorch Release  | Python Versions  | CUDA Versions  | Architectures      | ROCm Versions             | ROCm Architectures        |
++=================+==================+==================+================+====================+===========================+===========================+
+| 1.2.0           | 2.7.x            | 3.9, 3.10, 3.11, | 11.8, 12.6,    | 7.0, 8.0, 9.0,     | 6.1, 6.2.4, 6.3           | gfx908, gfx90a, gfx942    |
+|                 |                  | 3.12, 3.13       | 12.8           | 9.0a, 10.0a, 12.0a |                           |                           |
++-----------------+------------------+------------------+----------------+----------------------+---------------------------+---------------------------+
+| 1.1.0           | 2.6.x            | 3.9, 3.10, 3.11, | 11.8, 12.4,    | 7.0, 8.0, 9.0,     | 6.1, 6.2.4, 6.3           | gfx908, gfx90a, gfx942    |
+|                 |                  | 3.12, 3.13       | 12.6           | 9.0a               |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 1.0.0           | 2.5.x            | 3.9, 3.10, 3.11, | 11.8, 12.1,    | 7.0, 8.0, 9.0,     | 6.0, 6.1                  | gfx908, gfx90a            |
+|                 |                  | 3.12             | 12.4           | 9.0a               |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 0.8.0           | 2.4.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1,    | 7.0, 8.0, 9.0,     | 6.0, 6.1                  | gfx908, gfx90a            |
+|                 |                  | 3.11, 3.12       | 12.4           | 9.0a               |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 0.7.0           | 2.3.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0      | 6.0                       | gfx908, gfx90a            |
+|                 |                  | 3.11, 3.12       |                |                    |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 0.6.0           | 2.2.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0      | 5.7                       | gfx90a                    |
+|                 |                  | 3.11, 3.12       |                |                    |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 0.5.0           | 2.1.x            | 3.8, 3.9, 3.10,  | 11.8, 12.1     | 7.0, 8.0, 9.0      | 5.5, 5.6                  | gfx90a                    |
+|                 |                  | 3.11             |                |                    |                           |                           |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
+| 0.4.0           | 2.0.x            | 3.8, 3.9, 3.10   | 11.7, 11.8     | 7.0, 8.0           | 5.3, 5.4                  | gfx90a                    |
++-----------------+------------------+------------------+----------------+--------------------+---------------------------+---------------------------+
 
 Note that the list of supported CUDA and ROCm architectures refer to the targets
 support available in the default installation packages, and that building for