From 77503fcf2e9d29f1b7da4047967e7fa1e2db9e35 Mon Sep 17 00:00:00 2001 From: Helena Kloosterman Date: Fri, 22 Mar 2024 17:36:20 +0100 Subject: [PATCH] Small OpenVINO UX improvements (#629) * Update ov_config, change warning in .to() to debug - set PERFORMANCE_HINT to LATENCY if not specified in ov_config - replace warning log in .to() about devices with debug log (to prevent confusing users who create a pipeline(), which always shows this warning) * Set seq2seq ov_config in base model --- optimum/intel/openvino/modeling.py | 2 +- optimum/intel/openvino/modeling_base.py | 5 ++++- .../intel/openvino/modeling_base_seq2seq.py | 4 ++++ optimum/intel/openvino/modeling_diffusion.py | 4 +++- optimum/intel/openvino/modeling_seq2seq.py | 2 +- tests/openvino/test_modeling.py | 20 ++++++++++++++++++- 6 files changed, 32 insertions(+), 5 deletions(-) diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index 357ca94c07..8a816609fa 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -137,7 +137,7 @@ def to(self, device: str): self._device = device.upper() self.request = None else: - logger.warning(f"device must be of type {str} but got {type(device)} instead") + logger.debug(f"device must be of type {str} but got {type(device)} instead") return self diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 58d1ce6c73..a6b8aacf43 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -64,7 +64,10 @@ def __init__( self.model_save_dir = model_save_dir self._device = device.upper() self.is_dynamic = dynamic_shapes - self.ov_config = ov_config if ov_config is not None else {"PERFORMANCE_HINT": "LATENCY"} + self.ov_config = ov_config if ov_config is not None else {} + if self.ov_config.get("PERFORMANCE_HINT") is None: + self.ov_config["PERFORMANCE_HINT"] = "LATENCY" + self.preprocessors = kwargs.get("preprocessors", []) enable_compilation = kwargs.get("compile", True) diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 28e112c4d9..0daf9dfdd3 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -67,6 +67,10 @@ def __init__( self._device = device.upper() self.is_dynamic = dynamic_shapes self.ov_config = ov_config if ov_config is not None else {} + + if self.ov_config.get("PERFORMANCE_HINT") is None: + self.ov_config["PERFORMANCE_HINT"] = "LATENCY" + self.preprocessors = kwargs.get("preprocessors", []) if self.is_dynamic: diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index f0fea5a8ce..eb407b4cd1 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -101,6 +101,8 @@ def __init__( self._device = device.upper() self.is_dynamic = dynamic_shapes self.ov_config = ov_config if ov_config is not None else {} + if self.ov_config.get("PERFORMANCE_HINT") is None: + self.ov_config["PERFORMANCE_HINT"] = "LATENCY" # This attribute is needed to keep one reference on the temporary directory, since garbage collecting # would end-up removing the directory containing the underlying OpenVINO model @@ -456,7 +458,7 @@ def to(self, device: str): self._device = device.upper() self.clear_requests() else: - logger.warning(f"device must be of type {str} but got {type(device)} instead") + logger.debug(f"device must be of type {str} but got {type(device)} instead") return self diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index d68cbc75ed..4f948cdc5f 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -285,7 +285,7 @@ def to(self, device: str): self.decoder_with_past._device = self._device self.clear_requests() else: - logger.warning(f"device must be of type {str} but got {type(device)} instead") + logger.debug(f"device must be of type {str} but got {type(device)} instead") return self diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index d1da08f58e..af903f2226 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -116,6 +116,9 @@ def test_load_from_hub_and_save_model(self): tokens = tokenizer("This is a sample input", return_tensors="pt") loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID) self.assertIsInstance(loaded_model.config, PretrainedConfig) + # Test that PERFORMANCE_HINT is set to LATENCY by default + self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY") + self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT"), "LATENCY") loaded_model_outputs = loaded_model(**tokens) # Test specifying ov_config with throughput hint and manual cache dir @@ -134,7 +137,10 @@ def test_load_from_hub_and_save_model(self): folder_contents = os.listdir(tmpdirname) self.assertTrue(OV_XML_FILE_NAME in folder_contents) self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents) - model = OVModelForSequenceClassification.from_pretrained(tmpdirname) + model = OVModelForSequenceClassification.from_pretrained(tmpdirname, ov_config={"NUM_STREAMS": 2}) + # Test that PERFORMANCE_HINT is set to LATENCY by default even with ov_config provided + self.assertEqual(model.ov_config.get("PERFORMANCE_HINT"), "LATENCY") + self.assertEqual(model.request.get_property("PERFORMANCE_HINT"), "LATENCY") outputs = model(**tokens) self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits)) @@ -150,6 +156,9 @@ def test_load_from_hub_and_save_decoder_model(self, use_cache): tokens = tokenizer("This is a sample input", return_tensors="pt") loaded_model = OVModelForCausalLM.from_pretrained(model_id, use_cache=use_cache) self.assertIsInstance(loaded_model.config, PretrainedConfig) + # Test that PERFORMANCE_HINT is set to LATENCY by default + self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY") + self.assertEqual(loaded_model.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY") loaded_model_outputs = loaded_model(**tokens) with tempfile.TemporaryDirectory() as tmpdirname: @@ -172,6 +181,11 @@ def test_load_from_hub_and_save_seq2seq_model(self): loaded_model = OVModelForSeq2SeqLM.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID, compile=False) self.assertIsInstance(loaded_model.config, PretrainedConfig) loaded_model.to("cpu") + loaded_model.compile() + # Test that PERFORMANCE_HINT is set to LATENCY by default + self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY") + self.assertEqual(loaded_model.decoder.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY") + loaded_model_outputs = loaded_model.generate(**tokens) with tempfile.TemporaryDirectory() as tmpdirname: @@ -192,6 +206,10 @@ def test_load_from_hub_and_save_seq2seq_model(self): def test_load_from_hub_and_save_stable_diffusion_model(self): loaded_pipeline = OVStableDiffusionPipeline.from_pretrained(self.OV_DIFFUSION_MODEL_ID, compile=False) self.assertIsInstance(loaded_pipeline.config, Dict) + # Test that PERFORMANCE_HINT is set to LATENCY by default + self.assertEqual(loaded_pipeline.ov_config.get("PERFORMANCE_HINT"), "LATENCY") + loaded_pipeline.compile() + self.assertEqual(loaded_pipeline.unet.request.get_property("PERFORMANCE_HINT"), "LATENCY") batch_size, height, width = 2, 16, 16 np.random.seed(0) inputs = {