From d878453215c0eff3c4bf83cbd473833937ceec70 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 15 Dec 2023 13:16:33 +0400 Subject: [PATCH] Added FP32 option for weights data type --- optimum/commands/export/openvino.py | 11 +++++------ optimum/exporters/openvino/__main__.py | 4 ++-- optimum/exporters/openvino/convert.py | 2 +- setup.py | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index c63ba7887a..25626e20aa 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -69,20 +69,19 @@ def parse_args_openvino(parser: "ArgumentParser"): ), ) optional_group.add_argument( - "-c", - "--compress-weights", + "--weight-format", type=str, - choices=["f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"], + choices=["f32", "f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"], default=None, help=( - "The weight compression option, e.g. f16 stands for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights." + "The weight format of the exporting model, e.g. f32 stands for float32 weights, f16 - for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights." ), ) optional_group.add_argument( "--ratio", type=float, default=0.8, - help="Compression ratio between primary and backup precision (only relevant to INT4).", + help="Compression ratio between primary and backup precision (only applicable to INT4 type).", ) @@ -118,7 +117,7 @@ def run(self): cache_dir=self.args.cache_dir, trust_remote_code=self.args.trust_remote_code, pad_token_id=self.args.pad_token_id, - compression_option=self.args.compress_weights, + compression_option=self.args.weights_format, compression_ratio=self.args.ratio # **input_shapes, ) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 1ab2ed4b00..30f0c17038 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -121,7 +121,7 @@ def main_export( especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. compression_option (`Optional[str]`, defaults to `None`): The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point, - `i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point. + `i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression. compression_ratio (`Optional[float]`, defaults to `None`): Compression ratio between primary and backup precision (only relevant to INT4). **kwargs_shapes (`Dict`): @@ -134,7 +134,7 @@ def main_export( >>> main_export("gpt2", output="gpt2_onnx/") ``` """ - if compression_option is not None and compression_option != "f16" and not is_nncf_available(): + if compression_option is not None and compression_option != "f16" and compression_option != "f32" and not is_nncf_available(): raise ImportError( f"Compression of the weights to {compression_option} requires nncf, please install it with `pip install nncf`" ) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 18885bfa22..449e1264b8 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -55,7 +55,7 @@ def _save_model(model, path: str, compression_option: Optional[str] = None, compression_ratio: Optional[float] = None): - if compression_option is not None and compression_option != "f16": + if compression_option is not None and compression_option != "f16" and compression_option != "f32": if not is_nncf_available(): raise ImportError( "Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`" diff --git a/setup.py b/setup.py index 14668bb655..2863a620b5 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ "onnxruntime<1.15.0", "transformers>=4.33.0", ], - "openvino": ["openvino-nightly", "onnx", "onnxruntime", "transformers>=4.33.0"], + "openvino": ["openvino", "onnx", "onnxruntime", "transformers>=4.33.0"], "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers"],