Added FP32 option for weights data type

huggingface · Dec 15, 2023 · d878453 · d878453
1 parent f9800b7
commit d878453
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 10 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -69,20 +69,19 @@ def parse_args_openvino(parser: "ArgumentParser"):
         ),
     )
     optional_group.add_argument(
-        "-c",
-        "--compress-weights",
+        "--weight-format",
         type=str,
-        choices=["f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"],
+        choices=["f32", "f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"],
         default=None,
         help=(
-            "The weight compression option, e.g. f16 stands for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights."
+            "The weight format of the exporting model, e.g. f32 stands for float32 weights, f16 - for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights."
         ),
     )
     optional_group.add_argument(
         "--ratio",
         type=float,
         default=0.8,
-        help="Compression ratio between primary and backup precision (only relevant to INT4).",
+        help="Compression ratio between primary and backup precision (only applicable to INT4 type).",
     )
 
 
@@ -118,7 +117,7 @@ def run(self):
             cache_dir=self.args.cache_dir,
             trust_remote_code=self.args.trust_remote_code,
             pad_token_id=self.args.pad_token_id,
-            compression_option=self.args.compress_weights,
+            compression_option=self.args.weights_format,
             compression_ratio=self.args.ratio
             # **input_shapes,
         )
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -121,7 +121,7 @@ def main_export(
             especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
         compression_option (`Optional[str]`, defaults to `None`):
             The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
-            `i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
+            `i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
         compression_ratio (`Optional[float]`, defaults to `None`):
             Compression ratio between primary and backup precision (only relevant to INT4).
         **kwargs_shapes (`Dict`):
@@ -134,7 +134,7 @@ def main_export(
     >>> main_export("gpt2", output="gpt2_onnx/")
     ```
     """
-    if compression_option is not None and compression_option != "f16" and not is_nncf_available():
+    if compression_option is not None and compression_option != "f16" and compression_option != "f32" and not is_nncf_available():
         raise ImportError(
             f"Compression of the weights to {compression_option} requires nncf, please install it with `pip install nncf`"
         )

diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -55,7 +55,7 @@
 
 
 def _save_model(model, path: str, compression_option: Optional[str] = None, compression_ratio: Optional[float] = None):
-    if compression_option is not None and compression_option != "f16":
+    if compression_option is not None and compression_option != "f16" and compression_option != "f32":
         if not is_nncf_available():
             raise ImportError(
                 "Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"

diff --git a/setup.py b/setup.py
@@ -43,7 +43,7 @@
         "onnxruntime<1.15.0",
         "transformers>=4.33.0",
     ],
-    "openvino": ["openvino-nightly", "onnx", "onnxruntime", "transformers>=4.33.0"],
+    "openvino": ["openvino", "onnx", "onnxruntime", "transformers>=4.33.0"],
     "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"],
     "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"],
     "diffusers": ["diffusers"],