diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index a9cd8e309b..a94496c3bd 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -154,7 +154,8 @@ def update_pkv_precision(self, force_fp32=False): pkv_precision = Type.f32 if not force_fp32: device = self._device.upper() - pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT") + if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"): + pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT") # ov_config["INFERENCE_PRECISION_HINT"] may override the prefer precision if self.ov_config: inference_precision_hint = self.ov_config.get("INFERENCE_PRECISION_HINT", "") diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index a4bf9b38e0..1de402bc22 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -558,6 +558,14 @@ def test_compare_with_and_without_past_key_values(self): del model_without_pkv gc.collect() + def test_auto_device_loading(self): + model_id = MODEL_NAMES["gpt2"] + model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, device="AUTO") + model.half() + self.assertEqual(model._device, "AUTO") + del model + gc.collect() + class OVModelForMaskedLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = (