From fb2357a8df23568dde5defd576d6617914835cfe Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 12 Dec 2024 15:13:48 +0100 Subject: [PATCH] Do not export in auto if no compression will be applied --- optimum/intel/openvino/modeling_base.py | 3 ++- optimum/intel/openvino/modeling_base_seq2seq.py | 3 ++- optimum/intel/openvino/modeling_decoder.py | 2 +- optimum/intel/openvino/modeling_open_clip.py | 6 ++++-- optimum/intel/openvino/modeling_visual_language.py | 3 ++- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index b32ee4908b..2bbb46c126 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -592,7 +592,8 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_config = None else: - ov_config = OVConfig(dtype="auto") + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") main_export( model_name_or_path=model_id, diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 69c4bfab10..73ddc6fcbc 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -364,7 +364,8 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_config = None else: - ov_config = OVConfig(dtype="auto") + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") main_export( model_name_or_path=model_id, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 4897db1459..8953df693a 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -299,7 +299,7 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_export_config = None else: - ov_export_config = OVConfig(dtype="auto") + ov_export_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache) diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py index 0df51cfd69..54296aba48 100644 --- a/optimum/intel/openvino/modeling_open_clip.py +++ b/optimum/intel/openvino/modeling_open_clip.py @@ -247,7 +247,8 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_config = None else: - ov_config = OVConfig(dtype="auto") + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") def fn_get_submodels(model): return {"model_text": model.text} @@ -372,7 +373,8 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_config = None else: - ov_config = OVConfig(dtype="auto") + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") def fn_get_submodels(model): return {"model_vision": model.visual} diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 02ceab177d..a7c2210082 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -598,7 +598,8 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_config = None else: - ov_config = OVConfig(dtype="auto") + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)