diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 736b5234a9..1d3ab14907 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -238,9 +238,9 @@ def main_export( if load_in_8bit is None: if model_kwargs is None: model_kwargs = {} - + if model.num_parameters() >= _MAX_UNCOMPRESSED_DECODER_SIZE: - model_kwargs["load_in_8bit"] = True + model_kwargs["load_in_8bit"] = True else: model_kwargs["load_in_8bit"] = False diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 24af408f78..b1597e60d2 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -19,9 +19,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union -from transformers.utils import is_tf_available, is_torch_available - import nncf +from transformers.utils import is_tf_available, is_torch_available from openvino.runtime import PartialShape, save_model from openvino.runtime.utils.types import get_element_type @@ -199,7 +198,7 @@ def export_pytorch_via_onnx( ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, compress_to_fp16=False, - load_in_8bit=model_kwargs.get("load_in_8bit", False) + load_in_8bit=model_kwargs.get("load_in_8bit", False), ) return input_names, output_names, True @@ -323,14 +322,11 @@ def ts_patched_forward(*args, **kwargs): dims = inputs[input_name] for dim in dims: - static_shape[dim] = -1 + static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - _save_model(ov_model, - output, - compress_to_fp16=False, - load_in_8bit=model_kwargs.get("load_in_8bit", False)) + _save_model(ov_model, output, compress_to_fp16=False, load_in_8bit=model_kwargs.get("load_in_8bit", False)) clear_class_registry() del model gc.collect() diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index db3bc21108..91a2c7ddc2 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -86,6 +86,7 @@ "pegasus", } + @add_start_docstrings( """ Base OVBaseDecoderModel class. @@ -225,7 +226,6 @@ def _from_transformers( if use_cache: task = task + "-with-past" - main_export( model_name_or_path=model_id, output=save_dir_path,