NVIDIA arch-specific cuda and torch versions

cmdr2 · cmdr2 · commit 1d4896324113 · 2025-04-28T22:47:42.000+05:30
diff --git a/README.md b/README.md
@@ -77,9 +77,11 @@ The list of platforms on which `torchruntime` can install a working variant of P
 | 40xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.8  |
 | 30xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.8  |
 | 20xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.8  |
-| 10xx/16xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.8. Full-precision required on 16xx series  |
+| 16xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.8. Requires full-precision for image generation  |
+| 10xx  | ✅ Yes  | Win/Linux  | Uses CUDA 12.4  |
+| 7xx  | ✅ Yes  | Win/Linux  | Uses CUDA 11.8 |
 
-**Note:** We use CUDA 12.4 for Python 3.8, since torch dropped support for Python 3.8 after torch 2.4.
+**Note:** Torch dropped support for Python 3.8 from torch >= 2.5. torchruntime falls back to CUDA 12.4, if python 3.8 is being used.
 
 ### AMD
 
diff --git a/tests/test_platform_detection.py b/tests/test_platform_detection.py
@@ -94,26 +94,20 @@ def test_amd_gpu_mac(monkeypatch):
     assert get_torch_platform(gpu_infos) == "mps"
 
 
-def test_nvidia_gpu_windows(monkeypatch, capsys):
+def test_nvidia_gpu_windows(monkeypatch):
     monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
     monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
     gpu_infos = [GPU(NVIDIA, "NVIDIA", 0x1234, "GeForce", True)]
     expected = "cu124" if py_version < (3, 9) else "cu128"
     assert get_torch_platform(gpu_infos) == expected
-    if py_version < (3, 9):
-        captured = capsys.readouterr()
-        assert "Support for Python 3.8 was dropped in torch 2.5" in captured.out
 
 
-def test_nvidia_gpu_linux(monkeypatch, capsys):
+def test_nvidia_gpu_linux(monkeypatch):
     monkeypatch.setattr("torchruntime.platform_detection.os_name", "Linux")
     monkeypatch.setattr("torchruntime.platform_detection.arch", "x86_64")
     gpu_infos = [GPU(NVIDIA, "NVIDIA", 0x1234, "GeForce", True)]
     expected = "cu124" if py_version < (3, 9) else "cu128"
     assert get_torch_platform(gpu_infos) == expected
-    if py_version < (3, 9):
-        captured = capsys.readouterr()
-        assert "Support for Python 3.8 was dropped in torch 2.5" in captured.out
 
 
 def test_nvidia_gpu_mac(monkeypatch):
@@ -124,6 +118,52 @@ def test_nvidia_gpu_mac(monkeypatch):
         get_torch_platform(gpu_infos)
 
 
+def test_nvidia_7xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "1004", "GK110 [GeForce GTX 780]", True)]
+    assert get_torch_platform(gpu_infos) == "cu118"
+
+
+def test_nvidia_10xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "1c02", "GP106 [GeForce GTX 1060 3GB]", True)]
+    assert get_torch_platform(gpu_infos) == "cu124"
+
+
+def test_nvidia_16xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "21c4", "TU116 [GeForce GTX 1660 SUPER]", True)]
+    expected = "cu124" if py_version < (3, 9) else "cu128"
+    assert get_torch_platform(gpu_infos) == expected
+
+
+def test_nvidia_20xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "1f11", "TU106M [GeForce RTX 2060 Mobile]", True)]
+    expected = "cu124" if py_version < (3, 9) else "cu128"
+    assert get_torch_platform(gpu_infos) == expected
+
+
+def test_nvidia_30xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "2489", "GA104 [GeForce RTX 3060 Ti Lite Hash Rate]", True)]
+    expected = "cu124" if py_version < (3, 9) else "cu128"
+    assert get_torch_platform(gpu_infos) == expected
+
+
+def test_nvidia_40xx_gpu_windows(monkeypatch):
+    monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
+    monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
+    gpu_infos = [GPU(NVIDIA, "NVIDIA", "2705", "AD103 [GeForce RTX 4070 Ti SUPER]", True)]
+    expected = "cu124" if py_version < (3, 9) else "cu128"
+    assert get_torch_platform(gpu_infos) == expected
+
+
 def test_nvidia_5xxx_gpu_windows(monkeypatch):
     monkeypatch.setattr("torchruntime.platform_detection.os_name", "Windows")
     monkeypatch.setattr("torchruntime.platform_detection.arch", "amd64")
diff --git a/torchruntime/platform_detection.py b/torchruntime/platform_detection.py
@@ -9,7 +9,26 @@
 arch = platform.machine().lower()
 py_version = sys.version_info
 
-BLACKWELL_DEVICES = re.compile(r"\b(?:5060|5070|5080|5090)\b")
+# https://www.techpowerup.com/gpu-specs/?architecture=Kepler&sort=generation and so on (change the arch field)
+KEPLER_DEVICES = re.compile(r"\b(gk1\d{2}\w*)\b", re.IGNORECASE)  # sm3.7
+MAXWELL_DEVICES = re.compile(r"\b(gm10\d\w*)\b", re.IGNORECASE)  # sm5
+PASCAL_DEVICES = re.compile(r"\b(gp10\d\w*)\b", re.IGNORECASE)  # sm6
+VOLTA_DEVICES = re.compile(r"\b(gv100\w*)\b", re.IGNORECASE)  # sm7
+TURING_DEVICES = re.compile(r"\b(tu1\d{2}\w*)\b", re.IGNORECASE)  # sm7.5
+AMPERE_DEVICES = re.compile(r"\b(ga10\d\w*)\b", re.IGNORECASE)  # sm8.6
+ADA_LOVELACE_DEVICES = re.compile(r"\b(ad10\d\w*)\b", re.IGNORECASE)  # sm8.9
+BLACKWELL_DEVICES = re.compile(r"\b(?:5060|5070|5080|5090)\b", re.IGNORECASE)  # sm10, sm12
+
+NVIDIA_ARCH_MAP = {
+    BLACKWELL_DEVICES: 12,
+    ADA_LOVELACE_DEVICES: 8.9,
+    AMPERE_DEVICES: 8.6,
+    TURING_DEVICES: 7.5,
+    VOLTA_DEVICES: 7,
+    PASCAL_DEVICES: 6,
+    MAXWELL_DEVICES: 5,
+    KEPLER_DEVICES: 3.7,
+}
 
 
 def get_torch_platform(gpu_infos):
@@ -109,16 +128,17 @@ def _get_platform_for_discrete(gpu_infos):
             return "mps"
     elif vendor_id == NVIDIA:
         if os_name in ("Windows", "Linux"):
-            if py_version < (3, 9):
-                device_names = set(gpu.device_name for gpu in gpu_infos)
-                if any(BLACKWELL_DEVICES.search(device_name) for device_name in device_names):
-                    raise NotImplementedError(
-                        f"Torch does not support NVIDIA 50xx series of GPUs on Python 3.8. Please switch to a newer Python version to use the latest version of torch!"
-                    )
-
-                print(
-                    "[WARNING] Support for Python 3.8 was dropped in torch 2.5. torchruntime will default to using torch 2.4 instead, but consider switching to a newer Python version to use the latest version of torch!"
+            device_names = set(gpu.device_name for gpu in gpu_infos)
+            arch_version = get_nvidia_arch(device_names)
+            if py_version < (3, 9) and arch_version == 12:
+                raise NotImplementedError(
+                    f"Torch does not support NVIDIA 50xx series of GPUs on Python 3.8. Please switch to a newer Python version to use the latest version of torch!"
                 )
+
+            # https://github.com/pytorch/pytorch/blob/0b6ea0b959f65d53ea8a34c1fa1c46446dfe3603/.ci/manywheel/build_cuda.sh#L54
+            if arch_version == 3.7:
+                return "cu118"
+            if (arch_version > 3.7 and arch_version < 7.5) or py_version < (3, 9):
                 return "cu124"
 
             return "cu128"
@@ -151,6 +171,14 @@ def _get_platform_for_discrete(gpu_infos):
     return "cpu"
 
 
+def get_nvidia_arch(device_names):
+    for arch_regex, arch in NVIDIA_ARCH_MAP.items():
+        if any(arch_regex.search(device_name) for device_name in device_names):
+            return arch
+
+    return 0
+
+
 def _get_platform_for_integrated(gpu_infos):
     gpu = gpu_infos[0]