diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 8152b92d29..3baa9119a1 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -235,19 +235,17 @@ def main_export( onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) onnx_config = onnx_config_constructor(model.config) models_and_onnx_configs = {"model": (model, onnx_config)} - if model_kwargs is None: - model_kwargs = {} + model_kwargs = model_kwargs or {} load_in_8bit = model_kwargs.get("load_in_8bit", None) if load_in_8bit is None: if model.num_parameters() >= _MAX_UNCOMPRESSED_DECODER_SIZE: - model_kwargs["load_in_8bit"] = True - else: - model_kwargs["load_in_8bit"] = False - else: - if not is_nncf_available(): - raise ImportError( - "Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`" - ) + if not is_nncf_available(): + logger.warning( + "The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf." + "please install it with `pip install nncf`" + ) + else: + model_kwargs["load_in_8bit"] = True if not is_stable_diffusion: needs_pad_token_id = ( diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index b29efe253e..ab4a41e873 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -56,10 +56,10 @@ def _save_model(model, path: str, compress_to_fp16=False, load_in_8bit=False): if load_in_8bit: if not is_nncf_available(): - logger.warning( - "The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf." - "please install it with `pip install nncf`" + raise ImportError( + "Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`" ) + import nncf model = nncf.compress_weights(model) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 55758b6683..6563eed7d8 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -150,6 +150,8 @@ class OVWeightCompressionTest(unittest.TestCase): (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45, 22), ) + UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = ((OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 22),) + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS) def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8): task = model_cls.export_feature @@ -197,6 +199,18 @@ def test_ovmodel_weight_compression(self, model_cls, model_name, expected_pt_int outputs = model(**tokens) self.assertTrue("logits" in outputs) + @parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION) + def test_ovmodel_load_with_compressed_weights(self, model_cls, model_name, expected_ov_int8): + model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=True) + _, num_int8 = get_num_quantized_nodes(model) + self.assertEqual(expected_ov_int8, num_int8) + + @parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION) + def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_name, expected_ov_int8): + model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=False) + _, num_int8 = get_num_quantized_nodes(model) + self.assertEqual(0, num_int8) + class OVQuantizerQATest(unittest.TestCase): SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering",),) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 6699687c69..91defbefbb 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -715,7 +715,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=48, - expected_int8=31, + expected_int8=30, compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( @@ -734,7 +734,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=48, - expected_int8=31, + expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], ), @@ -742,7 +742,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=48, - expected_int8=31, + expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], ), @@ -751,7 +751,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=48, - expected_int8=31, + expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), @@ -760,7 +760,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=48, - expected_int8=31, + expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ),