Skip to content

Commit

Permalink
Do not export in auto if no compression will be applied
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Dec 12, 2024
1 parent b547418 commit fb2357a
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 6 deletions.
3 changes: 2 additions & 1 deletion optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,8 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = OVConfig(dtype="auto")
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

main_export(
model_name_or_path=model_id,
Expand Down
3 changes: 2 additions & 1 deletion optimum/intel/openvino/modeling_base_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,8 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = OVConfig(dtype="auto")
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

main_export(
model_name_or_path=model_id,
Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_export_config = None
else:
ov_export_config = OVConfig(dtype="auto")
ov_export_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)

Expand Down
6 changes: 4 additions & 2 deletions optimum/intel/openvino/modeling_open_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = OVConfig(dtype="auto")
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

def fn_get_submodels(model):
return {"model_text": model.text}
Expand Down Expand Up @@ -372,7 +373,8 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = OVConfig(dtype="auto")
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

def fn_get_submodels(model):
return {"model_vision": model.visual}
Expand Down
3 changes: 2 additions & 1 deletion optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,8 @@ def _from_transformers(
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = OVConfig(dtype="auto")
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)

Expand Down

0 comments on commit fb2357a

Please sign in to comment.