Skip to content

Commit

Permalink
Added a fix for FP16 overflow issue on GPU/NPU (#994)
Browse files Browse the repository at this point in the history
* Added a fix for FP16 overflow issue on GPU/NPU

* Style

* Updated export test

* Style
  • Loading branch information
AlexKoff88 authored Nov 11, 2024
1 parent a8e69a3 commit b3cbc95
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 1 deletion.
42 changes: 41 additions & 1 deletion optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,31 @@
from optimum.intel.openvino.configuration import OVConfig


def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None):
def _set_runtime_options(
models_and_export_configs: Dict[
str,
Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"],
],
task: str,
):
for model_name in models_and_export_configs.keys():
_, sub_export_config = models_and_export_configs[model_name]
if "vae_" in model_name or "text-generation" in task:
sub_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}


def _save_model(
model,
path: str,
ov_config: Optional["OVConfig"] = None,
library_name: Optional[str] = None,
config: OnnxConfig = None,
):
compress_to_fp16 = ov_config is not None and ov_config.dtype == "fp16"
model = _add_version_info_to_model(model, library_name)

if hasattr(config, "runtime_options"):
model = _add_runtime_options_to_rt_info(model, config.runtime_options)
save_model(model, path, compress_to_fp16)


Expand Down Expand Up @@ -213,6 +235,7 @@ def export_tensorflow(
output.parent / output,
ov_config=ov_config,
library_name=library_name,
config=config,
)
del ov_model
return input_names, output_names, True
Expand Down Expand Up @@ -276,6 +299,7 @@ def export_pytorch_via_onnx(
output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output,
ov_config=ov_config,
library_name=library_name,
config=config,
)
del ov_model
return input_names, output_names, True
Expand Down Expand Up @@ -450,6 +474,7 @@ def ts_patched_forward(*args, **kwargs):
output,
ov_config=ov_config,
library_name=library_name,
config=config,
)
clear_class_registry()
del ov_model
Expand Down Expand Up @@ -718,6 +743,8 @@ def export_from_model(

model.save_config(output)

_set_runtime_options(models_and_export_configs, task)

export_models(
models_and_export_configs=models_and_export_configs,
output_dir=output,
Expand Down Expand Up @@ -792,6 +819,19 @@ def export_tokenizer(
save_model(model, output / file_name.format(suffix))


def _add_runtime_options_to_rt_info(model: Model, options: Dict):
"""
Add runtime optinos
"""
try:
for name, value in options.items():
model.set_rt_info(value, ["runtime_options", name])
except Exception:
pass

return model


def _add_version_info_to_model(model: Model, library_name: Optional[str] = None):
"""
Add dependency versions to OpenVINO model
Expand Down
9 changes: 9 additions & 0 deletions tests/openvino/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ def _openvino_export(
self.assertEqual(
ov_model.model.get_rt_info()["optimum"]["transformers_version"], _transformers_version
)
self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]))

if library_name == "diffusers":
self.assertTrue(
ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
self.assertTrue(
ov_model.vae_decoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_export(self, model_type: str):
Expand Down

0 comments on commit b3cbc95

Please sign in to comment.