Skip to content

Commit

Permalink
Added FP32 option for weights data type
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKoff88 committed Dec 15, 2023
1 parent f9800b7 commit d878453
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 10 deletions.
11 changes: 5 additions & 6 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,19 @@ def parse_args_openvino(parser: "ArgumentParser"):
),
)
optional_group.add_argument(
"-c",
"--compress-weights",
"--weight-format",
type=str,
choices=["f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"],
choices=["f32", "f16", "i8", "i4_sym_g128", "i4_asym_g128", "i4_sym_g64", "i4_asym_g64"],
default=None,
help=(
"The weight compression option, e.g. f16 stands for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights."
"The weight format of the exporting model, e.g. f32 stands for float32 weights, f16 - for float16 weights, i8 - INT8 weights, i4_* - for INT4 compressed weights."
),
)
optional_group.add_argument(
"--ratio",
type=float,
default=0.8,
help="Compression ratio between primary and backup precision (only relevant to INT4).",
help="Compression ratio between primary and backup precision (only applicable to INT4 type).",
)


Expand Down Expand Up @@ -118,7 +117,7 @@ def run(self):
cache_dir=self.args.cache_dir,
trust_remote_code=self.args.trust_remote_code,
pad_token_id=self.args.pad_token_id,
compression_option=self.args.compress_weights,
compression_option=self.args.weights_format,
compression_ratio=self.args.ratio
# **input_shapes,
)
4 changes: 2 additions & 2 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def main_export(
especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `i4_sym_g128` - INT4 symmetric weights w/ group size 128, `i4_asym_g128` - as previous but asymmetric w/ zero-point,
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point.
`i4_sym_g64` - INT4 symmetric weights w/ group size 64, "i4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
**kwargs_shapes (`Dict`):
Expand All @@ -134,7 +134,7 @@ def main_export(
>>> main_export("gpt2", output="gpt2_onnx/")
```
"""
if compression_option is not None and compression_option != "f16" and not is_nncf_available():
if compression_option is not None and compression_option != "f16" and compression_option != "f32" and not is_nncf_available():
raise ImportError(
f"Compression of the weights to {compression_option} requires nncf, please install it with `pip install nncf`"
)
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@


def _save_model(model, path: str, compression_option: Optional[str] = None, compression_ratio: Optional[float] = None):
if compression_option is not None and compression_option != "f16":
if compression_option is not None and compression_option != "f16" and compression_option != "f32":
if not is_nncf_available():
raise ImportError(
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"onnxruntime<1.15.0",
"transformers>=4.33.0",
],
"openvino": ["openvino-nightly", "onnx", "onnxruntime", "transformers>=4.33.0"],
"openvino": ["openvino", "onnx", "onnxruntime", "transformers>=4.33.0"],
"nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"],
"ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"],
"diffusers": ["diffusers"],
Expand Down

0 comments on commit d878453

Please sign in to comment.