Update quant_primitives.py

iseeyuan · web-flow · commit 382af0da2c4c · 2024-09-04T17:58:41.000-07:00
diff --git a/torchao/quantization/quant_primitives.py b/torchao/quantization/quant_primitives.py
@@ -730,6 +730,10 @@ def _choose_qparams_affine(
         max_val_pos = max_val
 
     if mapping_type == MappingType.SYMMETRIC.name:
+        # calculate smin and smax individually and choose the larger one. For example, if quant_min = -8 and quant_max = 7.
+        # If smin is bigger: There would be coverage on negative values down to -8, and less rounding error than the existing SYMMETRIC case.
+        # If smax is bigger: it covers the positive values up to 7. The round error may be bigger than the existing SYMMETRIC case. 
+        # Either way, there's no out-of-range fp values after quantization. 
         smin = min_val_neg / float(quant_min)
         smax = max_val_pos / float(quant_max)
         mask = smin > smax