Skip to content

Commit

Permalink
Small OpenVINO UX improvements (huggingface#629)
Browse files Browse the repository at this point in the history
* Update ov_config, change warning in .to() to debug

- set PERFORMANCE_HINT to LATENCY if not specified in ov_config
- replace warning log in .to() about devices with debug log (to prevent
  confusing users who create a pipeline(), which always shows this warning)

* Set seq2seq ov_config in base model
  • Loading branch information
helena-intel authored Mar 22, 2024
1 parent 78b3d8c commit 77503fc
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 5 deletions.
2 changes: 1 addition & 1 deletion optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def to(self, device: str):
self._device = device.upper()
self.request = None
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
5 changes: 4 additions & 1 deletion optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ def __init__(
self.model_save_dir = model_save_dir
self._device = device.upper()
self.is_dynamic = dynamic_shapes
self.ov_config = ov_config if ov_config is not None else {"PERFORMANCE_HINT": "LATENCY"}
self.ov_config = ov_config if ov_config is not None else {}
if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"

self.preprocessors = kwargs.get("preprocessors", [])
enable_compilation = kwargs.get("compile", True)

Expand Down
4 changes: 4 additions & 0 deletions optimum/intel/openvino/modeling_base_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ def __init__(
self._device = device.upper()
self.is_dynamic = dynamic_shapes
self.ov_config = ov_config if ov_config is not None else {}

if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"

self.preprocessors = kwargs.get("preprocessors", [])

if self.is_dynamic:
Expand Down
4 changes: 3 additions & 1 deletion optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def __init__(
self._device = device.upper()
self.is_dynamic = dynamic_shapes
self.ov_config = ov_config if ov_config is not None else {}
if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"

# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
# would end-up removing the directory containing the underlying OpenVINO model
Expand Down Expand Up @@ -456,7 +458,7 @@ def to(self, device: str):
self._device = device.upper()
self.clear_requests()
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def to(self, device: str):
self.decoder_with_past._device = self._device
self.clear_requests()
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
20 changes: 19 additions & 1 deletion tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ def test_load_from_hub_and_save_model(self):
tokens = tokenizer("This is a sample input", return_tensors="pt")
loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT"), "LATENCY")
loaded_model_outputs = loaded_model(**tokens)

# Test specifying ov_config with throughput hint and manual cache dir
Expand All @@ -134,7 +137,10 @@ def test_load_from_hub_and_save_model(self):
folder_contents = os.listdir(tmpdirname)
self.assertTrue(OV_XML_FILE_NAME in folder_contents)
self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents)
model = OVModelForSequenceClassification.from_pretrained(tmpdirname)
model = OVModelForSequenceClassification.from_pretrained(tmpdirname, ov_config={"NUM_STREAMS": 2})
# Test that PERFORMANCE_HINT is set to LATENCY by default even with ov_config provided
self.assertEqual(model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(model.request.get_property("PERFORMANCE_HINT"), "LATENCY")

outputs = model(**tokens)
self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
Expand All @@ -150,6 +156,9 @@ def test_load_from_hub_and_save_decoder_model(self, use_cache):
tokens = tokenizer("This is a sample input", return_tensors="pt")
loaded_model = OVModelForCausalLM.from_pretrained(model_id, use_cache=use_cache)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")
loaded_model_outputs = loaded_model(**tokens)

with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -172,6 +181,11 @@ def test_load_from_hub_and_save_seq2seq_model(self):
loaded_model = OVModelForSeq2SeqLM.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID, compile=False)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
loaded_model.to("cpu")
loaded_model.compile()
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.decoder.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")

loaded_model_outputs = loaded_model.generate(**tokens)

with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -192,6 +206,10 @@ def test_load_from_hub_and_save_seq2seq_model(self):
def test_load_from_hub_and_save_stable_diffusion_model(self):
loaded_pipeline = OVStableDiffusionPipeline.from_pretrained(self.OV_DIFFUSION_MODEL_ID, compile=False)
self.assertIsInstance(loaded_pipeline.config, Dict)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_pipeline.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
loaded_pipeline.compile()
self.assertEqual(loaded_pipeline.unet.request.get_property("PERFORMANCE_HINT"), "LATENCY")
batch_size, height, width = 2, 16, 16
np.random.seed(0)
inputs = {
Expand Down

0 comments on commit 77503fc

Please sign in to comment.