diff --git a/hqq/core/quantize.py b/hqq/core/quantize.py index 8845c9c..80aaae7 100755 --- a/hqq/core/quantize.py +++ b/hqq/core/quantize.py @@ -1002,11 +1002,11 @@ def forward_aten_int8(self, x: Tensor) -> Tensor: def hqq_base_quant_config( nbits: int = 4, group_size: int = 64, - quant_zero: bool = True, + quant_zero: bool = False, quant_scale: bool = False, offload_meta: bool = False, # meta-data should be quantized with the same settings to use offload_meta view_as_float: bool = False, - axis: int = 0, + axis: int = 1, ): assert ( nbits in Quantizer.SUPPORTED_BITS @@ -1025,6 +1025,12 @@ def hqq_base_quant_config( "view_as_float": view_as_float, } + if(quant_zero or quant_scale): + print(colored('Warning: Quantized meta-data is deprecated and will be removed. It is not supported for quantized model serialization.', 'yellow')) + + if(offload_meta): + print(colored('Warning: Meta-data offloading is deprecated and will be removed. It is not supported for quantized model serialization.', 'yellow')) + if offload_meta: if quant_scale != quant_zero: # print(colored("quant_zero and quant_scale must be the same when offload_meta is set to True. Setting quant_scale=quant_zero." , 'yellow'))