Fix general code style and appliy renaming suggestions

huggingface · Dec 21, 2023 · 6b0236b · 6b0236b
1 parent aa299e6
commit 6b0236b
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 65 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -95,7 +95,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
     optional_group.add_argument(
         "--stateful",
         action="store_true",
-        help="Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs"
+        help="Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs",
     )
 
 
@@ -143,7 +143,7 @@ def run(self):
             trust_remote_code=self.args.trust_remote_code,
             pad_token_id=self.args.pad_token_id,
             compression_option=self.args.weight_format,
-            compression_ratio=self.args.ratio
+            compression_ratio=self.args.ratio,
             stateful=self.args.stateful,
             # **input_shapes,
         )
diff --git a/optimum/exporters/openvino/better_transformer_patch.py b/optimum/exporters/openvino/better_transformer_patch.py
@@ -13,26 +13,29 @@
 #  limitations under the License.
 
 
-import torch
 import types
 
+import torch
+
 
 def patch_model_with_bettertransformer(model, model_config):
     try:
         model = model.to_bettertransformer()
     except Exception as e:
-        print(f'[ WARNING ] Cannot apply model.to_bettertransformer because of the exception:\n{e}')
+        print(f"[ WARNING ] Cannot apply model.to_bettertransformer because of the exception:\n{e}")
         return model
 
     # for better transformers we need sequence lenght to be not 1 to make a correct trace
     # patch generate_dummy_inputs in the config
 
     def pathed_generate_dummy_inputs(self, *args, **kwargs):
         dummy_inputs = self._original_generate_dummy_inputs(*args, **kwargs)
-        if 'input_ids' in dummy_inputs and dummy_inputs['input_ids'].shape[1] == 1:
-            dummy_inputs['input_ids'] = torch.cat([dummy_inputs['input_ids'], dummy_inputs['input_ids']], dim=-1)
-            attention_mask = dummy_inputs['attention_mask']
-            dummy_inputs['attention_mask'] = torch.cat([attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1)
+        if "input_ids" in dummy_inputs and dummy_inputs["input_ids"].shape[1] == 1:
+            dummy_inputs["input_ids"] = torch.cat([dummy_inputs["input_ids"], dummy_inputs["input_ids"]], dim=-1)
+            attention_mask = dummy_inputs["attention_mask"]
+            dummy_inputs["attention_mask"] = torch.cat(
+                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+            )
         return dummy_inputs
 
     model_config._original_generate_dummy_inputs = model_config.generate_dummy_inputs

diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -30,10 +30,10 @@
 from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
 from optimum.exporters.onnx.model_patcher import DecoderModelPatcher
 from optimum.utils import is_diffusers_available
-from .stateful import patch_stateful, raise_if_openvino_is_too_old
-from .better_transformer_patch import patch_model_with_bettertransformer
 
 from ...intel.utils.import_utils import is_nncf_available, is_optimum_version
+from .better_transformer_patch import patch_model_with_bettertransformer
+from .stateful import patch_stateful, raise_if_openvino_is_too_old
 from .utils import (
     OV_XML_FILE_NAME,
     clear_class_registry,
@@ -396,8 +396,9 @@ def ts_patched_forward(*args, **kwargs):
                 model.forward = orig_forward
             if stateful:
                 raise ValueError(
-                    'Making stateful models is not supported when exporting to ONNX as an intermediate step. '
-                    'Set stateful=False, or provide a model that can be converted to OpenVINO without fallback to ONNX conversion path.')
+                    "Making stateful models is not supported when exporting to ONNX as an intermediate step. "
+                    "Set stateful=False, or provide a model that can be converted to OpenVINO without fallback to ONNX conversion path."
+                )
             return export_pytorch_via_onnx(
                 model,
                 config,
@@ -432,8 +433,8 @@ def ts_patched_forward(*args, **kwargs):
 
         if stateful:
             # Patching model according to stateful parameters
-            model.key_value_input_names = [name for name in input_names if name.startswith('past_key_values.')]
-            model.key_value_output_names = [name for name in output_names if name.startswith('present.')]
+            model.key_value_input_names = [name for name in input_names if name.startswith("past_key_values.")]
+            model.key_value_output_names = [name for name in output_names if name.startswith("present.")]
             patch_stateful(model, ov_model)
 
         _save_model(ov_model, output, compression_option=compression_option, compression_ratio=compression_ratio)

diff --git a/optimum/exporters/openvino/stateful.py b/optimum/exporters/openvino/stateful.py
@@ -14,23 +14,23 @@
 
 
 import numpy as np
-from packaging import version
+
 import openvino as ov
 from openvino.runtime import opset13
 from optimum.intel.utils.import_utils import is_openvino_version
 from optimum.utils.normalized_config import NormalizedConfigManager
 
 
-def model_has_name(ov_model: ov.Model, name: str):
-    return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], list())
+def model_has_input_output_name(ov_model: ov.Model, name: str):
+    return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], [])
 
 
 def model_has_input(ov_model: ov.Model, name: str):
-    return name in sum([list(t.get_names()) for t in ov_model.inputs], list())
+    return name in sum([list(t.get_names()) for t in ov_model.inputs], [])
 
 
 def model_has_cache_reorder(ov_model):
-    return model_has_input(ov_model, 'beam_idx')
+    return model_has_input(ov_model, "beam_idx")
 
 
 def model_has_state(ov_model):
@@ -39,18 +39,18 @@ def model_has_state(ov_model):
 
 
 def fuse_cache_reorder(ov_model: ov.Model, not_kv_inputs, key_value_input_names, gather_dim: int):
-    """ Adds a new beam_idx parameter and Gather op per each kv-cache input in a given model.
-        Should be run before make_stateful. Implements optimumum's _reorder_cache
-        inside the model in the beginning of each iteration.
-        Gather works along given gather_dim dimension that may vary from model to model.
-        KV-cache inputs are identified based on names in key_value_input_names.
-        Append the new beam_idx parameter to not_kv_inputs.
+    """Adds a new beam_idx parameter and Gather op per each kv-cache input in a given model.
+    Should be run before make_stateful. Implements optimumum's _reorder_cache
+    inside the model in the beginning of each iteration.
+    Gather works along given gather_dim dimension that may vary from model to model.
+    KV-cache inputs are identified based on names in key_value_input_names.
+    Append the new beam_idx parameter to not_kv_inputs.
     """
 
-    assert not model_has_name(ov_model, 'beam_idx')
-    input_batch = ov_model.input('input_ids').get_partial_shape()[0]
-    beam_idx = opset13.parameter(name='beam_idx', dtype=ov.Type.i32, shape=ov.PartialShape([input_batch]))
-    beam_idx.output(0).get_tensor().add_names({'beam_idx'})  # why list is not accepted?
+    assert not model_has_input_output_name(ov_model, "beam_idx")
+    input_batch = ov_model.input("input_ids").get_partial_shape()[0]
+    beam_idx = opset13.parameter(name="beam_idx", dtype=ov.Type.i32, shape=ov.PartialShape([input_batch]))
+    beam_idx.output(0).get_tensor().add_names({"beam_idx"})  # why list is not accepted?
     ov_model.add_parameters([beam_idx])
     not_kv_inputs.append(ov_model.inputs[-1])
     # Go over all cache parameters and fuse _reorder_cache with indices provided by the new parameter beam_idx
@@ -65,29 +65,29 @@ def fuse_cache_reorder(ov_model: ov.Model, not_kv_inputs, key_value_input_names,
 
 def build_state_initializer(ov_model: ov.Model, batch_dim):
     """Build initialization ShapeOf Expression for all ReadValue ops"""
-    input_ids = ov_model.input('input_ids')
-    batch = opset13.gather(opset13.shape_of(input_ids, output_type='i64'), opset13.constant([0]), opset13.constant(0))
+    input_ids = ov_model.input("input_ids")
+    batch = opset13.gather(opset13.shape_of(input_ids, output_type="i64"), opset13.constant([0]), opset13.constant(0))
     for op in ov_model.get_ops():
-        if op.get_type_name() == 'ReadValue':
+        if op.get_type_name() == "ReadValue":
             dims = [dim.min_length for dim in list(op.get_output_partial_shape(0))]
             dims[batch_dim] = batch
-            dims = [opset13.constant(np.array([dim], dtype=np.int64)) if type(dim) is int else dim for dim in dims]
+            dims = [opset13.constant(np.array([dim], dtype=np.int64)) if isinstance(dim, int) else dim for dim in dims]
             shape = opset13.concat(dims, axis=0)
             broadcast = opset13.broadcast(opset13.constant(0.0, dtype=op.get_output_element_type(0)), shape)
             op.set_arguments([broadcast])
     ov_model.validate_nodes_and_infer_types()
 
 
 def make_stateful(
-        ov_model: ov.Model,
-        not_kv_inputs,
-        key_value_input_names,
-        key_value_output_names,
-        batch_dim,
-        num_attention_heads,
-        num_beams_and_batch=None):
-    """ Hides kv-cache inputs and outputs inside the model as variables.
-    """
+    ov_model: ov.Model,
+    not_kv_inputs,
+    key_value_input_names,
+    key_value_output_names,
+    batch_dim,
+    num_attention_heads,
+    num_beams_and_batch=None,
+):
+    """Hides kv-cache inputs and outputs inside the model as variables."""
     from openvino._offline_transformations import apply_make_stateful_transformation
 
     input_output_map = {}
@@ -101,7 +101,7 @@ def make_stateful(
                 shape[0] = num_beams_and_batch
                 input.get_node().set_partial_shape(shape)
             else:
-                print(f'[ WARNING ] Rank of {input.get_any_name()} input of the model is not 2, batch size is not set')
+                print(f"[ WARNING ] Rank of {input.get_any_name()} input of the model is not 2, batch size is not set")
 
     for kv_name_pair in zip(key_value_input_names, key_value_output_names):
         input_output_map[kv_name_pair[0]] = kv_name_pair[1]
@@ -122,27 +122,33 @@ def make_stateful(
 
 def raise_if_openvino_is_too_old():
     if is_openvino_version("<=", "2023.2"):
-        raise ValueError(f'Could not create or use stateful model when using old version of openvino=={ov.__version__}. Install openvino>=2023.3.0.')
+        raise ValueError(
+            f"Could not create or use stateful model when using old version of openvino=={ov.__version__}. Install openvino>=2023.3.0."
+        )
 
 
-def patch_stateful(model, ov_model):
+def patch_stateful(config, ov_model):
     raise_if_openvino_is_too_old()
-    not_kv_inputs = [input for input in ov_model.inputs if not any(name in model.key_value_input_names for name in input.get_names())]
+
+    key_value_input_names = [
+        key.get_any_name() for key in ov_model.inputs if any("key_values" in key_name for key_name in key.names)
+    ]
+    key_value_output_names = [
+        key.get_any_name() for key in ov_model.output if any("present" in key_name for key_name in key.names)
+    ]
+    not_kv_inputs = [
+        input for input in ov_model.inputs if not any(name in key_value_input_names for name in input.get_names())
+    ]
 
     # By default, batch is the 0-th but chatglm uses 1-st dimension as batch
     # TODO: Deduce from a model via ordinal reshape (?) and topology
-    batch_dim = 1 if model.config.model_type == 'chatglm' else 0
+    batch_dim = 1 if config.model_type == "chatglm" else 0
 
-    fuse_cache_reorder(ov_model, not_kv_inputs, model.key_value_input_names, batch_dim)
+    fuse_cache_reorder(ov_model, not_kv_inputs, key_value_input_names, batch_dim)
 
-    normalized_config = NormalizedConfigManager.get_normalized_config_class(model.config.model_type)(model.config)
-    num_attention_heads = normalized_config.num_attention_heads if model.config.model_type == 'bloom' else 1
+    normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config)
+    num_attention_heads = normalized_config.num_attention_heads if config.model_type == "bloom" else 1
 
     make_stateful(
-        ov_model,
-        not_kv_inputs,
-        model.key_value_input_names,
-        model.key_value_output_names,
-        batch_dim,
-        num_attention_heads,
-        None)
+        ov_model, not_kv_inputs, key_value_input_names, key_value_output_names, batch_dim, num_attention_heads, None
+    )
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -29,12 +29,11 @@
 
 from optimum.utils import NormalizedConfigManager
 
-from ...exporters.openvino import main_export
+from ...exporters.openvino import main_export, patch_stateful, raise_if_openvino_is_too_old
 from ..utils.import_utils import is_transformers_version
 from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
 from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
 from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE
-from ...exporters.openvino import patch_stateful, raise_if_openvino_is_too_old
 
 
 if is_transformers_version("<", "4.25.0"):
@@ -164,8 +163,7 @@ def raise_error(model_prop, user_prop, name):
             self.compile()
 
         if use_cache ^ self.use_cache:
-            raise_error(self.use_cache, use_cache, 'use_cache')
-
+            raise_error(self.use_cache, use_cache, "use_cache")
 
     def update_pkv_precision(self, force_fp32=False):
         if not self.use_cache or self.stateful:
@@ -310,7 +308,7 @@ def compile(self):
             self.request = self.request.create_infer_request()
 
     def _make_stateful(self):
-        patch_stateful(self, self.model)
+        patch_stateful(self.config, self.model)
         self.stateful = True
 
 
@@ -379,7 +377,7 @@ def forward(
                 for input_name in self.key_value_input_names:
                     model_inputs = self.model.input(input_name)
                     shape = model_inputs.get_partial_shape()
-                    if self.config.model_type == 'chatglm':
+                    if self.config.model_type == "chatglm":
                         shape[0] = 0
                         shape[1] = batch_size
                     else:
@@ -427,8 +425,8 @@ def forward(
 
             inputs["position_ids"] = position_ids
 
-        if hasattr(self, 'next_beam_idx'):
-            inputs['beam_idx'] = self.next_beam_idx
+        if hasattr(self, "next_beam_idx"):
+            inputs["beam_idx"] = self.next_beam_idx
 
         # Run inference
         self.request.start_async(inputs, share_inputs=True)