From b5e87b2a2fd9b8add32bdd22bb04aee8247242c7 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Mon, 15 Jan 2024 15:54:04 +0400 Subject: [PATCH] Apply suggestions from code review Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/commands/export/openvino.py | 2 +- optimum/exporters/openvino/__main__.py | 7 ++----- optimum/exporters/openvino/stateful.py | 21 ++------------------- tests/openvino/test_modeling.py | 5 +---- tests/openvino/test_quantization.py | 9 +++------ 5 files changed, 9 insertions(+), 35 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 8791e8751d..7f620033ec 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -93,7 +93,7 @@ def parse_args_openvino(parser: "ArgumentParser"): ), ) optional_group.add_argument( - "--no-stateful", + "--disable-stateful", action="store_true", help=( "Disable stateful converted models, stateless models will be generated instead. Stateful models are produced by default when this key is not used. " diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index fcb9ab9c35..b263177329 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -66,7 +66,7 @@ def main_export( fn_get_submodels: Optional[Callable] = None, compression_option: Optional[str] = None, compression_ratio: Optional[float] = None, - stateful: Optional[bool] = True, + stateful: bool = True, **kwargs_shapes, ): """ @@ -126,7 +126,7 @@ def main_export( `int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression. compression_ratio (`Optional[float]`, defaults to `None`): Compression ratio between primary and backup precision (only relevant to INT4). - stateful (`Optional[bool]`, defaults to `True`): + stateful (`bool`, defaults to `True`): Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs **kwargs_shapes (`Dict`): Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. @@ -281,9 +281,6 @@ class StoreAttr(object): possible_synonyms = "" logger.info(f"Automatic task detection to {task}{possible_synonyms}.") - synonyms_for_task = TasksManager.synonyms_for_task(task) - synonyms_for_task.add(task) - task_support_stateful = ensure_export_task_support_stateful(task) if stateful and not task_support_stateful: stateful = False diff --git a/optimum/exporters/openvino/stateful.py b/optimum/exporters/openvino/stateful.py index 3d5579c04a..e6ec1879a5 100644 --- a/optimum/exporters/openvino/stateful.py +++ b/optimum/exporters/openvino/stateful.py @@ -45,22 +45,6 @@ def model_has_input_output_name(ov_model: ov.Model, name: str): return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], []) -def model_has_input(ov_model: ov.Model, name: str): - """ - Helper function for checking that model has specified input name - - Parameters: - ov_model (ov.Model): - opennvino model - name (str): - name of input - - Returns: - True if input with requested name exists else False - """ - return name in sum([list(t.get_names()) for t in ov_model.inputs], []) - - def fuse_cache_reorder( ov_model: ov.Model, not_kv_inputs: List[str], key_value_input_names: List[str], gather_dim: int ): @@ -200,9 +184,8 @@ def ensure_stateful_is_available(warn=True): def ensure_export_task_support_stateful(task: str): - synonyms_for_task = TasksManager.synonyms_for_task(task) - synonyms_for_task.add(task) - return "text-generation-with-past" in synonyms_for_task + task = TasksManager.map_from_synonym(task) + return task == "text-generation-with-past" def patch_stateful(config: PretrainedConfig, ov_model: ov.Model): diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 4d4e36ecb9..334329cdd2 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -497,10 +497,7 @@ def test_compare_to_transformers(self, model_arch): ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True) self.assertIsInstance(ov_model.config, PretrainedConfig) self.assertTrue(ov_model.use_cache) - if self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode": - self.assertTrue(ov_model.stateful) - else: - self.assertFalse(ov_model.stateful) + self.assertEqual(ov_model.stateful, self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode") transformers_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokens = tokenizer( diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 1646582072..d6da6a78ba 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -300,12 +300,9 @@ def test_ovmodel_stateful_load_with_compressed_weights(self, model_cls, model_ty self.assertTrue(model.stateful) self.assertTrue(model.use_cache) - models = [model] - - expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type] - for i, model in enumerate(models): - _, num_int8, _ = get_num_quantized_nodes(model) - self.assertEqual(expected_ov_int8[i], num_int8) + expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type][0] + _, num_int8, _ = get_num_quantized_nodes(model) + self.assertEqual(expected_ov_int8, num_int8) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION) def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):