Skip to content

Commit

Permalink
Style
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKoff88 committed Nov 3, 2023
1 parent b083150 commit 320e94e
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 51 deletions.
7 changes: 6 additions & 1 deletion optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ def parse_args_openvino(parser: "ArgumentParser"):
"The weight compression option, e.g. f16 stands for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights."
),
)
optional_group.add_argument("--ratio", type=float, default=0.8, help="Compression ratio between primary and backup precision (only relevant to INT4).")
optional_group.add_argument(
"--ratio",
type=float,
default=0.8,
help="Compression ratio between primary and backup precision (only relevant to INT4).",
)


class OVExportCommand(BaseOptimumCLICommand):
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main_export(
Experimental usage: Override the default submodels that are used at the export. This is
especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
Expand Down
30 changes: 19 additions & 11 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def _save_model(model, path: str, compression_option: Optional[str] = None, comp
)

import nncf

COMPRESSION_OPTIONS = {
"i8": { "mode": nncf.CompressWeightsMode.INT8 },
"i8": {"mode": nncf.CompressWeightsMode.INT8},
"i4_sym_g128": {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 128,
Expand All @@ -86,7 +86,7 @@ def _save_model(model, path: str, compression_option: Optional[str] = None, comp
},
}
model = nncf.compress_weights(model, **COMPRESSION_OPTIONS[compression_option])

compress_to_fp16 = compression_option == "f16"
save_model(model, path, compress_to_fp16)

Expand Down Expand Up @@ -118,7 +118,7 @@ def export(
The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for
export on CUDA devices.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
Expand Down Expand Up @@ -174,7 +174,6 @@ def export_tensorflow(
output: Path,
compression_option: Optional[str] = None,
compression_ratio: Optional[float] = None,

):
"""
Export the TensorFlow model to OpenVINO format.
Expand All @@ -193,7 +192,9 @@ def export_tensorflow(
onnx_path = Path(output).with_suffix(".onnx")
input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path)
ov_model = convert_model(str(onnx_path))
_save_model(ov_model, output.parent / output, compression_option=compression_option, compression_ratio=compression_ratio)
_save_model(
ov_model, output.parent / output, compression_option=compression_option, compression_ratio=compression_ratio
)
return input_names, output_names, True


Expand Down Expand Up @@ -228,7 +229,7 @@ def export_pytorch_via_onnx(
model_kwargs (optional[Dict[str, Any]], defaults to `None`):
Additional kwargs for model export.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
Expand All @@ -254,7 +255,7 @@ def export_pytorch_via_onnx(
ov_model,
output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output,
compression_option=compression_option,
compression_ratio=compression_ratio
compression_ratio=compression_ratio,
)
return input_names, output_names, True

Expand Down Expand Up @@ -364,8 +365,15 @@ def ts_patched_forward(*args, **kwargs):
except Exception as ex:
logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX")
return export_pytorch_via_onnx(
model, config, opset, output, device, input_shapes, model_kwargs, compression_option=compression_option,
compression_ratio=compression_ratio
model,
config,
opset,
output,
device,
input_shapes,
model_kwargs,
compression_option=compression_option,
compression_ratio=compression_ratio,
)
ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs}
ordered_input_names = list(inputs)
Expand Down Expand Up @@ -421,7 +429,7 @@ def export_models(
input_shapes (Optional[Dict], optional, Defaults to None):
If specified, allows to use specific shapes for the example input provided to the exporter.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
compression_ratio (`Optional[int]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
Expand Down
6 changes: 5 additions & 1 deletion optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ def _enable_standard_onnx_export_option(self):
# save_onnx_model is defaulted to false so that the final model output is
# in OpenVINO IR to realize performance benefit in OpenVINO runtime.
# True value of save_onnx_model will save a model in onnx format.
if isinstance(self.compression, dict) and "algorithm" in self.compression and self.compression["algorithm"] == "quantization":
if (
isinstance(self.compression, dict)
and "algorithm" in self.compression
and self.compression["algorithm"] == "quantization"
):
self.compression["export_to_onnx_standard_ops"] = self.save_onnx_model
elif isinstance(self.compression, list):
for i, algo_config in enumerate(self.compression):
Expand Down
23 changes: 6 additions & 17 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,13 @@
OV_XML_FILE_NAME,
)


COMPRESSION_OPTIONS = {
"i8": { "mode": nncf.CompressWeightsMode.INT8 },
"i4_sym_g128": {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 128
},
"i4_asym_g128": {
"mode": nncf.CompressWeightsMode.INT4_ASYM,
"group_size": 128
},
"i4_sym_g64": {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 64
},
"i4_asym_g64": {
"mode": nncf.CompressWeightsMode.INT4_ASYM,
"group_size": 64
},
"i8": {"mode": nncf.CompressWeightsMode.INT8},
"i4_sym_g128": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128},
"i4_asym_g128": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128},
"i4_sym_g64": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64},
"i4_asym_g64": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64},
}

register_module(ignored_algorithms=[])(Conv1D)
Expand Down
31 changes: 21 additions & 10 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
from tempfile import TemporaryDirectory

from parameterized import parameterized
from utils_tests import _ARCHITECTURES_TO_EXPECTED_INT8, MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT4_INT8
from utils_tests import (
_ARCHITECTURES_TO_EXPECTED_INT4_INT8,
_ARCHITECTURES_TO_EXPECTED_INT8,
MODEL_NAMES,
get_num_quantized_nodes,
)

from optimum.exporters.openvino.__main__ import main_export
from optimum.intel import ( # noqa
Expand Down Expand Up @@ -56,21 +61,27 @@ class OVCLIExportTestCase(unittest.TestCase):
("stable-diffusion-xl", "stable-diffusion-xl"),
("stable-diffusion-xl", "stable-diffusion-xl-refiner"),
)

SUPPORTED_4BIT_ARCHITECTURES = (
("text-generation-with-past", "opt125m"),
)


SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)

SUPPORTED_4BIT_OPTIONS = ["i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"]

TEST_4BIT_CONFIGURATONS = []
for arch in SUPPORTED_4BIT_ARCHITECTURES:
for option in SUPPORTED_4BIT_OPTIONS:
TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option])

def _openvino_export(self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None):
def _openvino_export(
self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None
):
with TemporaryDirectory() as tmpdir:
main_export(model_name_or_path=model_name, output=tmpdir, task=task, compression_option=compression_option, compression_ratio=compression_ratio)
main_export(
model_name_or_path=model_name,
output=tmpdir,
task=task,
compression_option=compression_option,
compression_ratio=compression_ratio,
)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_export(self, task: str, model_type: str):
Expand Down Expand Up @@ -138,4 +149,4 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type]
_, num_int8, num_int4 = get_num_quantized_nodes(model)
self.assertEqual(expected_int8, num_int8)
self.assertEqual(expected_int4, num_int4)
self.assertEqual(expected_int4, num_int4)
14 changes: 8 additions & 6 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,8 @@ class OVWeightCompressionTest(unittest.TestCase):
(OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70, 70),
(OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45, 44),
)

SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = (
(OVModelForCausalLM, "opt125m", 82, 323),
)

SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 82, 323),)

SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = (
(OVModelForCausalLM, "gpt2"),
Expand Down Expand Up @@ -211,7 +209,7 @@ def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_p
tokens = tokenizer("This is a sample input", return_tensors="pt")
outputs = model(**tokens)
self.assertTrue("logits" in outputs)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS)
def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_int8, expected_int4):
task = model_cls.export_feature
Expand All @@ -224,7 +222,11 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
tokenizer.pad_token = tokenizer.eos_token

quantizer = OVQuantizer.from_pretrained(transformers_model, task=task)
quantizer.quantize(save_directory=tmp_dir, weights_only=True, quantization_config=OVConfig(compression={"type": "i4_sym_g128", "ratio": 0.8}))
quantizer.quantize(
save_directory=tmp_dir,
weights_only=True,
quantization_config=OVConfig(compression={"type": "i4_sym_g128", "ratio": 0.8}),
)
model = model_cls.from_pretrained(tmp_dir)

_, num_int8, num_int4 = get_num_quantized_nodes(model)
Expand Down
5 changes: 1 addition & 4 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@
}


_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {
"opt125m": (82, 323)
}
_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (82, 323)}


def get_num_quantized_nodes(ov_model):
Expand All @@ -131,4 +129,3 @@ def get_num_quantized_nodes(ov_model):
if "4" in elem.get_output_element_type(i).get_type_name():
num_int4 += 1
return num_fake_quantize, num_int8, num_int4

0 comments on commit 320e94e

Please sign in to comment.