Apply suggestions from code review

Co-authored-by: Ella Charlaix <[email protected]>
huggingface · Jan 15, 2024 · b5e87b2 · b5e87b2
1 parent 2fb9032
commit b5e87b2
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 35 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -93,7 +93,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
         ),
     )
     optional_group.add_argument(
-        "--no-stateful",
+        "--disable-stateful",
         action="store_true",
         help=(
             "Disable stateful converted models, stateless models will be generated instead. Stateful models are produced by default when this key is not used. "

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -66,7 +66,7 @@ def main_export(
     fn_get_submodels: Optional[Callable] = None,
     compression_option: Optional[str] = None,
     compression_ratio: Optional[float] = None,
-    stateful: Optional[bool] = True,
+    stateful: bool = True,
     **kwargs_shapes,
 ):
     """
@@ -126,7 +126,7 @@ def main_export(
             `int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
         compression_ratio (`Optional[float]`, defaults to `None`):
             Compression ratio between primary and backup precision (only relevant to INT4).
-        stateful (`Optional[bool]`, defaults to `True`):
+        stateful (`bool`, defaults to `True`):
             Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
         **kwargs_shapes (`Dict`):
             Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
@@ -281,9 +281,6 @@ class StoreAttr(object):
             possible_synonyms = ""
         logger.info(f"Automatic task detection to {task}{possible_synonyms}.")
 
-    synonyms_for_task = TasksManager.synonyms_for_task(task)
-    synonyms_for_task.add(task)
-
     task_support_stateful = ensure_export_task_support_stateful(task)
     if stateful and not task_support_stateful:
         stateful = False

diff --git a/optimum/exporters/openvino/stateful.py b/optimum/exporters/openvino/stateful.py
@@ -45,22 +45,6 @@ def model_has_input_output_name(ov_model: ov.Model, name: str):
     return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], [])
 
 
-def model_has_input(ov_model: ov.Model, name: str):
-    """
-    Helper function for checking that model has specified input name
-
-    Parameters:
-      ov_model (ov.Model):
-          opennvino model
-      name (str):
-          name of input
-
-    Returns:
-      True if input with requested name exists else False
-    """
-    return name in sum([list(t.get_names()) for t in ov_model.inputs], [])
-
-
 def fuse_cache_reorder(
     ov_model: ov.Model, not_kv_inputs: List[str], key_value_input_names: List[str], gather_dim: int
 ):
@@ -200,9 +184,8 @@ def ensure_stateful_is_available(warn=True):
 
 
 def ensure_export_task_support_stateful(task: str):
-    synonyms_for_task = TasksManager.synonyms_for_task(task)
-    synonyms_for_task.add(task)
-    return "text-generation-with-past" in synonyms_for_task
+    task = TasksManager.map_from_synonym(task)
+    return task == "text-generation-with-past"
 
 
 def patch_stateful(config: PretrainedConfig, ov_model: ov.Model):

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -497,10 +497,7 @@ def test_compare_to_transformers(self, model_arch):
         ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True)
         self.assertIsInstance(ov_model.config, PretrainedConfig)
         self.assertTrue(ov_model.use_cache)
-        if self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode":
-            self.assertTrue(ov_model.stateful)
-        else:
-            self.assertFalse(ov_model.stateful)
+        self.assertEqual(ov_model.stateful, self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode")
         transformers_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokens = tokenizer(

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -300,12 +300,9 @@ def test_ovmodel_stateful_load_with_compressed_weights(self, model_cls, model_ty
         self.assertTrue(model.stateful)
         self.assertTrue(model.use_cache)
 
-        models = [model]
-
-        expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type]
-        for i, model in enumerate(models):
-            _, num_int8, _ = get_num_quantized_nodes(model)
-            self.assertEqual(expected_ov_int8[i], num_int8)
+        expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type][0]
+        _, num_int8, _ = get_num_quantized_nodes(model)
+        self.assertEqual(expected_ov_int8, num_int8)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
     def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):