Fixed example and UT for weight-only quantization

Signed-off-by: Cheng, Penghui <[email protected]>
PenghuiCheng · Mar 13, 2024 · e804df3 · e804df3
1 parent 5d90b52
commit e804df3
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py
@@ -230,7 +230,7 @@ def quantize(
                     batch_size=batch_size,
                     remove_unused_columns=remove_unused_columns,
                     data_collator=data_collator,
-                    use_label=False,
+                    use_label=False if "GPTQ" in algo else True,
                 )
             quantization_config.calib_dataloader = calibration_dataloader
 

diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py
@@ -205,10 +205,10 @@ def test_weight_only_quantization(self):
         model = AutoModelForCausalLM.from_pretrained(model_name)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-        quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
         calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
             q_model = quantizer.quantize(
                 quantization_config=quantization_config,
@@ -220,6 +220,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(
                 algorithm="GPTQ",
                 weight_dtype="int4_clip",
@@ -235,6 +236,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(
                 algorithm="AWQ",
                 weight_dtype="int4_clip",
@@ -250,6 +252,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             q_model = quantizer.quantize(
                 weight_only=True,  # use RTN quantization method and NF4 weight data type is default.
                 save_directory=tmp_dir,