update BaseQuantizeConfig() default params for 0.2.0

mobiusml · Aug 28, 2024 · aba8ebe · aba8ebe
1 parent 33608a4
commit aba8ebe
Showing 1 changed file with 8 additions and 2 deletions.
diff --git a/hqq/core/quantize.py b/hqq/core/quantize.py
@@ -1002,11 +1002,11 @@ def forward_aten_int8(self, x: Tensor) -> Tensor:
 def hqq_base_quant_config(
     nbits: int = 4,
     group_size: int = 64,
-    quant_zero: bool = True,
+    quant_zero: bool = False,
     quant_scale: bool = False,
     offload_meta: bool = False,  # meta-data should be quantized with the same settings to use offload_meta
     view_as_float: bool = False,
-    axis: int = 0,
+    axis: int = 1,
 ):
     assert (
         nbits in Quantizer.SUPPORTED_BITS
@@ -1025,6 +1025,12 @@ def hqq_base_quant_config(
         "view_as_float": view_as_float,
     }
 
+    if(quant_zero or quant_scale):
+        print(colored('Warning: Quantized meta-data is deprecated and will be removed. It is not supported for quantized model serialization.', 'yellow'))
+
+    if(offload_meta):
+        print(colored('Warning: Meta-data offloading is deprecated and will be removed. It is not supported for quantized model serialization.', 'yellow'))
+
     if offload_meta:
         if quant_scale != quant_zero:
             # print(colored("quant_zero and quant_scale must be the same when offload_meta is set to True. Setting quant_scale=quant_zero." , 'yellow'))