Skip to content

Commit

Permalink
Fixed example and UT for weight-only quantization
Browse files Browse the repository at this point in the history
Signed-off-by: Cheng, Penghui <[email protected]>
  • Loading branch information
PenghuiCheng committed Mar 13, 2024
1 parent 5d90b52 commit e804df3
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def quantize(
batch_size=batch_size,
remove_unused_columns=remove_unused_columns,
data_collator=data_collator,
use_label=False,
use_label=False if "GPTQ" in algo else True,
)
quantization_config.calib_dataloader = calibration_dataloader

Expand Down
5 changes: 4 additions & 1 deletion tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ def test_weight_only_quantization(self):
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
q_model = quantizer.quantize(
quantization_config=quantization_config,
Expand All @@ -220,6 +220,7 @@ def test_weight_only_quantization(self):
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
quantization_config = WeightOnlyQuantConfig(
algorithm="GPTQ",
weight_dtype="int4_clip",
Expand All @@ -235,6 +236,7 @@ def test_weight_only_quantization(self):
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
quantization_config = WeightOnlyQuantConfig(
algorithm="AWQ",
weight_dtype="int4_clip",
Expand All @@ -250,6 +252,7 @@ def test_weight_only_quantization(self):
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
q_model = quantizer.quantize(
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
save_directory=tmp_dir,
Expand Down

0 comments on commit e804df3

Please sign in to comment.