Skip to content

Commit

Permalink
Changed the logic of default 8-bit weights compression (#445)
Browse files Browse the repository at this point in the history
* Added 8bit compression for decoders larger than 1B

* Style

* Fixed issue

* Fixed one more issue

* Added warning for nncf absense in case of default compression to 8 bits

* Fixed an issue. Added warning message when NNCF is not available

* Revised logic of the default INT8 export

* Added tests for auto weights compression

* Updated references
  • Loading branch information
AlexKoff88 authored Oct 4, 2023
1 parent d207110 commit d761009
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 18 deletions.
18 changes: 8 additions & 10 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,17 @@ def main_export(
onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task)
onnx_config = onnx_config_constructor(model.config)
models_and_onnx_configs = {"model": (model, onnx_config)}
if model_kwargs is None:
model_kwargs = {}
model_kwargs = model_kwargs or {}
load_in_8bit = model_kwargs.get("load_in_8bit", None)
if load_in_8bit is None:
if model.num_parameters() >= _MAX_UNCOMPRESSED_DECODER_SIZE:
model_kwargs["load_in_8bit"] = True
else:
model_kwargs["load_in_8bit"] = False
else:
if not is_nncf_available():
raise ImportError(
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
)
if not is_nncf_available():
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
"please install it with `pip install nncf`"
)
else:
model_kwargs["load_in_8bit"] = True

if not is_stable_diffusion:
needs_pad_token_id = (
Expand Down
6 changes: 3 additions & 3 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@
def _save_model(model, path: str, compress_to_fp16=False, load_in_8bit=False):
if load_in_8bit:
if not is_nncf_available():
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
"please install it with `pip install nncf`"
raise ImportError(
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
)

import nncf

model = nncf.compress_weights(model)
Expand Down
14 changes: 14 additions & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class OVWeightCompressionTest(unittest.TestCase):
(OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45, 22),
)

UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = ((OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 22),)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS)
def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
task = model_cls.export_feature
Expand Down Expand Up @@ -197,6 +199,18 @@ def test_ovmodel_weight_compression(self, model_cls, model_name, expected_pt_int
outputs = model(**tokens)
self.assertTrue("logits" in outputs)

@parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
def test_ovmodel_load_with_compressed_weights(self, model_cls, model_name, expected_ov_int8):
model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=True)
_, num_int8 = get_num_quantized_nodes(model)
self.assertEqual(expected_ov_int8, num_int8)

@parameterized.expand(UPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_name, expected_ov_int8):
model = model_cls.from_pretrained(model_name, export=True, load_in_8bit=False)
_, num_int8 = get_num_quantized_nodes(model)
self.assertEqual(0, num_int8)


class OVQuantizerQATest(unittest.TestCase):
SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering",),)
Expand Down
10 changes: 5 additions & 5 deletions tests/openvino/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
compression_metrics=["compression_loss"],
),
"structured_movement_sparsity": OVTrainerTestDescriptor(
Expand All @@ -734,15 +734,15 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss"],
),
"quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss"],
),
Expand All @@ -751,7 +751,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
),
Expand All @@ -760,7 +760,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
expected_fake_quantize=48,
expected_int8=31,
expected_int8=30,
expected_binary_masks=48,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
),
Expand Down

0 comments on commit d761009

Please sign in to comment.