Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed the logic of default 8-bit weights compression #445

Merged
merged 10 commits into from
Oct 4, 2023
18 changes: 8 additions & 10 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,17 @@ def main_export(
onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task)
onnx_config = onnx_config_constructor(model.config)
models_and_onnx_configs = {"model": (model, onnx_config)}
if model_kwargs is None:
model_kwargs = {}
model_kwargs = model_kwargs or {}
load_in_8bit = model_kwargs.get("load_in_8bit", None)
if load_in_8bit is None:
if model.num_parameters() >= _MAX_UNCOMPRESSED_DECODER_SIZE:
model_kwargs["load_in_8bit"] = True
else:
model_kwargs["load_in_8bit"] = False
else:
if not is_nncf_available():
raise ImportError(
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
)
if not is_nncf_available():
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
"please install it with `pip install nncf`"
)
else:
model_kwargs["load_in_8bit"] = True

if not is_stable_diffusion:
needs_pad_token_id = (
Expand Down
6 changes: 3 additions & 3 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@
def _save_model(model, path: str, compress_to_fp16=False, load_in_8bit=False):
if load_in_8bit:
if not is_nncf_available():
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
"please install it with `pip install nncf`"
raise ImportError(
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
)

import nncf

model = nncf.compress_weights(model)
Expand Down
14 changes: 14 additions & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class OVWeightCompressionTest(unittest.TestCase):
(OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45, 22),
)

UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = ((OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 22),)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS)
def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
task = model_cls.export_feature
Expand Down Expand Up @@ -197,6 +199,18 @@ def test_ovmodel_weight_compression(self, model_cls, model_name, expected_pt_int
outputs = model(**tokens)
self.assertTrue("logits" in outputs)

@parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
def test_ovmodel_load_with_compressed_weights(self, model_cls, model_name, expected_ov_int8):
model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=True)
_, num_int8 = get_num_quantized_nodes(model)
self.assertEqual(expected_ov_int8, num_int8)

@parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_name, expected_ov_int8):
model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=False)
_, num_int8 = get_num_quantized_nodes(model)
self.assertEqual(0, num_int8)


class OVQuantizerQATest(unittest.TestCase):
SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering",),)
Expand Down
10 changes: 5 additions & 5 deletions tests/openvino/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change ?

compression_metrics=["compression_loss"],
),
"structured_movement_sparsity": OVTrainerTestDescriptor(
Expand All @@ -734,15 +734,15 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss"],
),
"quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss"],
),
Expand All @@ -751,7 +751,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
),
Expand All @@ -760,7 +760,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
),
Expand Down
Loading