Skip to content

Commit

Permalink
Fixed pre-CI error
Browse files Browse the repository at this point in the history
Signed-off-by: Cheng, Penghui <[email protected]>
  • Loading branch information
PenghuiCheng committed Mar 17, 2024
1 parent 3d28d4a commit 9ec53ce
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
python -m pip install --upgrade pip
pip install cmake>=3.16
pip install py-cpuinfo
pip install torch==2.1.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[neural-compressor,diffusers,tests]
pip install intel-extension-for-pytorch==2.1.100
- name: Test with Pytest
Expand Down
6 changes: 3 additions & 3 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,17 +287,17 @@ def quantize(

if not isinstance(quantization_config, PostTrainingQuantConfig):
if use_cpu:
# will remove after intel-extension-for-transformers 1.3.3 released
# will remove after intel-extension-for-transformers 1.3.3 release.
quantization_config.device = "cpu"
quantization_config.post_init()
elif use_xpu:
# will remove after intel-extension-for-transformers 1.3.3 released
# will remove after intel-extension-for-transformers 1.3.3 release.
quantization_config.device = "xpu"
quantization_config.post_init_xpu()
self._quantized_model = convert_to_quantized_model(
self._original_model, quantization_config, device=quantization_config.device
)
# will remove after intel-extension-for-transformers 1.3.3 released
# will remove after intel-extension-for-transformers 1.3.3 release.
if hasattr(quantization_config, "calib_dataloader"):
quantization_config.calib_dataloader = None
self._quantized_model.quantization_config = quantization_config
Expand Down
12 changes: 8 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@
"rjieba",
"timm",
"invisible-watermark>=0.2.0",
# Will remove after intel-extension-for-transformers 1.3.3 released.
"intel-extension-for-transformers>=1.3",
"peft",
"auto-gptq",
"transformers_stream_generator",
"einops",
Expand All @@ -60,7 +57,14 @@
QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
"neural-compressor": [
"neural-compressor>=2.2.0",
"onnxruntime<1.15.0",
"accelerate",
# will remove after intel-extension-for-transformers 1.3.3 release.
"intel-extension-for-transformers>=1.3",
"peft",
],
"openvino": ["openvino>=2023.3", "nncf>=2.8.1"],
"openvino-tokenizers": ["openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.8.1"],
Expand Down
77 changes: 33 additions & 44 deletions tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ class OptimizationTest(INCTestMixin):
"hf-internal-testing/tiny-random-GPTNeoForCausalLM",
)

WEIGHT_ONLY_CONFIG = (
(False, "RTN", "int4_clip"),
(False, "GPTQ", "int4_clip"),
(False, "RTN", "int8"),
(True, "", ""),
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls):
quantization_config = PostTrainingQuantConfig(approach="dynamic")
Expand Down Expand Up @@ -202,59 +209,41 @@ def test_ipex_static_quantization_with_smoothquant(self, task, model_name, expec
load_ipex_model=True,
)

@parameterized.expand(WEIGHT_ONLY_CONFIG)
@unittest.skipIf(
not is_intel_extension_for_transformers_available(), reason="Intel-extension-for-transformers not available!"
)
def test_weight_only_quantization(self):
def test_weight_only_quantization(self, no_config, algo, weight_dtype):
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
q_model = quantizer.quantize(
quantization_config=quantization_config,
save_directory=tmp_dir,
)
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
out = model(inp)[0]
q_out = q_model(inp)[0]
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
quantization_config = WeightOnlyQuantConfig(
algorithm="GPTQ",
algorithm_args={
"percdamp": 0.01,
"act_order": False,
"scheme": "sym",
},
weight_dtype="int4_clip",
)
q_model = quantizer.quantize(
quantization_config=quantization_config,
calibration_dataset=calibration_dataset,
save_directory=tmp_dir,
)
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
out = model(inp)[0]
q_out = q_model(inp)[0]
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
q_model = quantizer.quantize(
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
save_directory=tmp_dir,
)
if not no_config:
if algo == "GPTQ":
algorithm_args = {
"percdamp": 0.01,
"act_order": False,
"scheme": "sym",
}
quantization_config = WeightOnlyQuantConfig(
algorithm=algo,
algorithm_args=algorithm_args if algo == "GPTQ" else None,
weight_dtype=weight_dtype,
)
q_model = quantizer.quantize(
quantization_config=quantization_config,
calibration_dataset=calibration_dataset if algo == "GPTQ" else None,
save_directory=tmp_dir,
)
else:
q_model = quantizer.quantize(
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
save_directory=tmp_dir,
)
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
out = model(inp)[0]
Expand Down

0 comments on commit 9ec53ce

Please sign in to comment.