|
43 | 43 | from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
|
44 | 44 |
|
45 | 45 |
|
46 |
| -act_qspec_asym8u = QuantizationSpec( |
| 46 | +act_qspec_asym8s = QuantizationSpec( |
47 | 47 | dtype=torch.int8,
|
48 | 48 | quant_min=-128,
|
49 | 49 | quant_max=127,
|
|
52 | 52 | observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
|
53 | 53 | )
|
54 | 54 |
|
55 |
| -wgt_qspec_asym8u = QuantizationSpec( |
| 55 | +wgt_qspec_asym8s = QuantizationSpec( |
56 | 56 | dtype=torch.int8,
|
57 | 57 | quant_min=-128,
|
58 | 58 | quant_max=127,
|
|
61 | 61 | observer_or_fake_quant_ctr=MinMaxObserver,
|
62 | 62 | )
|
63 | 63 |
|
64 |
| -wgt_qspec_asym8s = QuantizationSpec( |
| 64 | +wgt_qspec_sym8s = QuantizationSpec( |
65 | 65 | dtype=torch.int8,
|
66 | 66 | quant_min=-128,
|
67 | 67 | quant_max=127,
|
|
72 | 72 |
|
73 | 73 | bias_qspec: Optional[QuantizationSpec] = None
|
74 | 74 |
|
75 |
| -qconfig_A8uW8u = QuantizationConfig( |
76 |
| - act_qspec_asym8u, |
77 |
| - act_qspec_asym8u, |
78 |
| - wgt_qspec_asym8u, |
| 75 | +qconfig_A8W8 = QuantizationConfig( |
| 76 | + act_qspec_asym8s, |
| 77 | + act_qspec_asym8s, |
| 78 | + wgt_qspec_asym8s, |
79 | 79 | None,
|
80 | 80 | )
|
81 | 81 |
|
82 |
| -qconfig_A8uW8s = QuantizationConfig( |
83 |
| - act_qspec_asym8u, |
84 |
| - act_qspec_asym8u, |
85 |
| - wgt_qspec_asym8s, |
| 82 | +qconfig_A8W8sym = QuantizationConfig( |
| 83 | + act_qspec_asym8s, |
| 84 | + act_qspec_asym8s, |
| 85 | + wgt_qspec_sym8s, |
86 | 86 | None,
|
87 | 87 | )
|
88 | 88 |
|
@@ -189,15 +189,15 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
|
189 | 189 |
|
190 | 190 | def get_cadence_default_quantizers() -> List[Quantizer]:
|
191 | 191 | return [
|
192 |
| - CadenceAtenQuantizer(AddmmPattern(), qconfig_A8uW8u), |
193 |
| - CadenceAtenQuantizer(BmmPattern(), qconfig_A8uW8u), |
194 |
| - CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8uW8s), |
195 |
| - CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8uW8s), |
196 |
| - CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8uW8u), |
197 |
| - CadenceAtenQuantizer(LinearPattern(), qconfig_A8uW8u), |
198 |
| - CadenceAtenQuantizer(MatmulPattern(), qconfig_A8uW8u), |
199 |
| - CadenceAtenQuantizer(ReluPattern0(), qconfig_A8uW8u), |
200 |
| - CadenceAtenQuantizer(ReluPattern1(), qconfig_A8uW8u), |
| 192 | + CadenceAtenQuantizer(AddmmPattern(), qconfig_A8W8), |
| 193 | + CadenceAtenQuantizer(BmmPattern(), qconfig_A8W8), |
| 194 | + CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8W8sym), |
| 195 | + CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8W8sym), |
| 196 | + CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8W8), |
| 197 | + CadenceAtenQuantizer(LinearPattern(), qconfig_A8W8), |
| 198 | + CadenceAtenQuantizer(MatmulPattern(), qconfig_A8W8), |
| 199 | + CadenceAtenQuantizer(ReluPattern0(), qconfig_A8W8), |
| 200 | + CadenceAtenQuantizer(ReluPattern1(), qconfig_A8W8), |
201 | 201 | ]
|
202 | 202 |
|
203 | 203 |
|
@@ -244,6 +244,6 @@ class CadenceWakeWordQuantizer(CadenceQuantizer):
|
244 | 244 | def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None:
|
245 | 245 | if quantizers is None:
|
246 | 246 | quantizers = get_cadence_default_quantizers()
|
247 |
| - quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8uW8u)) |
248 |
| - quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8uW8u)) |
| 247 | + quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8W8)) |
| 248 | + quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8W8)) |
249 | 249 | super().__init__(quantizers)
|
0 commit comments