Fix for AUTO:device; add device to compile info msg

Also remove _SUPPORTED_DEVICES since they are not used
huggingface · Sep 22, 2023 · f418ae2 · f418ae2
1 parent 1db2651
commit f418ae2
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 27 deletions.
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
@@ -43,17 +43,6 @@
 
 logger = logging.getLogger(__name__)
 
-_SUPPORTED_DEVICES = {
-    "CPU",
-    "GPU",
-    "AUTO",
-    "AUTO:CPU,GPU",
-    "AUTO:GPU,CPU",
-    "MULTI",
-    "MULTI:CPU,GPU",
-    "MULTI:GPU,CPU",
-}
-
 
 # workaround to enable compatibility between openvino models and transformers pipelines
 class PreTrainedModel(OptimizedModel):
@@ -325,7 +314,7 @@ def _to_load(
 
     def compile(self):
         if self.request is None:
-            logger.info("Compiling the model...")
+            logger.info(f"Compiling the model to {self._device} ...")
             ov_config = {**self.ov_config}
             if "CACHE_DIR" not in self.ov_config.keys():
                 # Set default CACHE_DIR only if it is not set.
@@ -382,11 +371,6 @@ def half(self):
         self.request = None
         return self
 
-    def _ensure_supported_device(self, device: str = None):
-        device = device if device is not None else self._device
-        if device not in _SUPPORTED_DEVICES:
-            raise ValueError(f"Unknown device: {device}. Expected one of {_SUPPORTED_DEVICES}.")
-
     def forward(self, *args, **kwargs):
         raise NotImplementedError
 

diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -154,8 +154,12 @@ def update_pkv_precision(self, force_fp32=False):
         pkv_precision = Type.f32
         if not force_fp32:
             device = self._device.upper()
-            if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
-                pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
+            try:
+                if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
+                    pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
+            except RuntimeError:  # use default precision when get_property fails, e.g. when device is "AUTO:GPU"
+                pass
+
             # ov_config["INFERENCE_PRECISION_HINT"] may override the prefer precision
             if self.ov_config:
                 inference_precision_hint = self.ov_config.get("INFERENCE_PRECISION_HINT", "")

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -540,7 +540,7 @@ def __init__(
 
     def _compile(self):
         if self.request is None:
-            logger.info(f"Compiling the {self._model_name}...")
+            logger.info(f"Compiling the {self._model_name} to {self.device} ...")
             self.request = core.compile_model(self.model, self.device, self.ov_config)
 
     @property

diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
@@ -347,7 +347,7 @@ def __call__(self, *args, **kwargs):
 
     def _compile(self):
         if self.request is None:
-            logger.info("Compiling the encoder...")
+            logger.info(f"Compiling the encoder to {self._device} ...")
             self.request = core.compile_model(self.model, self._device, self.ov_config)
 
 
@@ -442,5 +442,5 @@ def __call__(self, *args, **kwargs):
 
     def _compile(self):
         if self.request is None:
-            logger.info("Compiling the decoder...")
+            logger.info(f"Compiling the decoder to {self._device} ...")
             self.request = core.compile_model(self.model, self._device, self.ov_config).create_infer_request()
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -560,11 +560,12 @@ def test_compare_with_and_without_past_key_values(self):
 
     def test_auto_device_loading(self):
         model_id = MODEL_NAMES["gpt2"]
-        model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, device="AUTO")
-        model.half()
-        self.assertEqual(model._device, "AUTO")
-        del model
-        gc.collect()
+        for device in ("AUTO", "AUTO:CPU"):
+            model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, device=device)
+            model.half()
+            self.assertEqual(model._device, device)
+            del model
+            gc.collect()
 
 
 class OVModelForMaskedLMIntegrationTest(unittest.TestCase):