update

jerryzh168 · jerryzh168 · commit 7600e9bcccf6 · 2025-04-08T15:36:21.000-07:00
diff --git a/test/prototype/test_codebook_quant.py b/test/prototype/test_codebook_quant.py
@@ -9,18 +9,19 @@
 
 from torchao.prototype.quantization.codebook import (
     CodebookQuantizedTensor,
+    CodebookWeightOnlyConfig,
     choose_qparams_codebook,
-    codebook_weight_only,
 )
 from torchao.quantization import quantize_
 from torchao.quantization.utils import compute_error
+from torchao.testing.utils import skip_if_no_cuda
 
 
 class TestCodebookQuantization(unittest.TestCase):
     def setUp(self):
         torch.manual_seed(123)
         self.input = torch.randn(100, 256, dtype=torch.float32)
-        self.block_size = (2, 2)
+        self.block_size = (1, 1)
         self.scale_block_size = 64
         self.code_dtype = torch.uint8
         self.chunk_size = 1024
@@ -71,16 +72,14 @@ def test_codebook_quantized_tensor_from_float2(self):
 
     def test_quantize_api(self):
         m = torch.nn.Sequential(torch.nn.Linear(64, 64))
-        quantize_(m, codebook_weight_only())
+        quantize_(m, CodebookWeightOnlyConfig())
         assert type(m[0].weight) == CodebookQuantizedTensor
 
+    @skip_if_no_cuda()
     def test_export(self):
-        m = torch.nn.Sequential(torch.nn.Linear(128, 64)).to(
-            dtype=torch.bfloat16, device="cuda"
-        )
-        quantize_(m, codebook_weight_only())
-        # quantize_(m, int4_weight_only(group_size=16))
-        example_inputs = (torch.randn(1, 128, dtype=torch.bfloat16, device="cuda"),)
+        m = torch.nn.Sequential(torch.nn.Linear(128, 64)).to(dtype=torch.bfloat16)
+        quantize_(m, CodebookWeightOnlyConfig())
+        example_inputs = (torch.randn(1, 128, dtype=torch.bfloat16),)
         print("m:", m)
         # torchao.utils.unwrap_tensor_subclass(m)
         m = torch.export.export_for_training(m, example_inputs).module()
diff --git a/torchao/prototype/quantization/codebook/__init__.py b/torchao/prototype/quantization/codebook/__init__.py
@@ -3,11 +3,11 @@
     dequantize_codebook,
     quantize_codebook,
 )
-from .codebook_quantized_tensor import CodebookQuantizedTensor, codebook_weight_only
+from .codebook_quantized_tensor import CodebookQuantizedTensor, CodebookWeightOnlyConfig
 
 __all__ = [
     "CodebookQuantizedTensor",
-    "codebook_weight_only",
+    "CodebookWeightOnlyConfig",
     "quantize_codebook",
     "dequantize_codebook",
     "choose_qparams_codebook",
diff --git a/torchao/prototype/quantization/codebook/codebook_ops.py b/torchao/prototype/quantization/codebook/codebook_ops.py
@@ -182,7 +182,6 @@ def choose_qparams_codebook(
     Returns:
         torch.Tensor: The codebook tensor, shape (codebook_size, *block_size).
     """
-    breakpoint()
     if code_dtype == torch.int32:
         codebook_size = 2**16
     else:
diff --git a/torchao/testing/utils.py b/torchao/testing/utils.py
@@ -96,6 +96,7 @@ def skip_if_no_cuda():
     def decorator(test_func):
         def wrapper(*args, **kwargs):
             if not torch.cuda.is_available():
+                print("no cuda available")
                 raise unittest.SkipTest("No cuda available")
             return test_func(*args, **kwargs)