Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Ella Charlaix <[email protected]>
  • Loading branch information
eaidova and echarlaix committed Jan 15, 2024
1 parent 2fb9032 commit b5e87b2
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 35 deletions.
2 changes: 1 addition & 1 deletion optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
),
)
optional_group.add_argument(
"--no-stateful",
"--disable-stateful",
action="store_true",
help=(
"Disable stateful converted models, stateless models will be generated instead. Stateful models are produced by default when this key is not used. "
Expand Down
7 changes: 2 additions & 5 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def main_export(
fn_get_submodels: Optional[Callable] = None,
compression_option: Optional[str] = None,
compression_ratio: Optional[float] = None,
stateful: Optional[bool] = True,
stateful: bool = True,
**kwargs_shapes,
):
"""
Expand Down Expand Up @@ -126,7 +126,7 @@ def main_export(
`int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
stateful (`Optional[bool]`, defaults to `True`):
stateful (`bool`, defaults to `True`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs
**kwargs_shapes (`Dict`):
Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
Expand Down Expand Up @@ -281,9 +281,6 @@ class StoreAttr(object):
possible_synonyms = ""
logger.info(f"Automatic task detection to {task}{possible_synonyms}.")

synonyms_for_task = TasksManager.synonyms_for_task(task)
synonyms_for_task.add(task)

task_support_stateful = ensure_export_task_support_stateful(task)
if stateful and not task_support_stateful:
stateful = False
Expand Down
21 changes: 2 additions & 19 deletions optimum/exporters/openvino/stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,6 @@ def model_has_input_output_name(ov_model: ov.Model, name: str):
return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], [])


def model_has_input(ov_model: ov.Model, name: str):
"""
Helper function for checking that model has specified input name
Parameters:
ov_model (ov.Model):
opennvino model
name (str):
name of input
Returns:
True if input with requested name exists else False
"""
return name in sum([list(t.get_names()) for t in ov_model.inputs], [])


def fuse_cache_reorder(
ov_model: ov.Model, not_kv_inputs: List[str], key_value_input_names: List[str], gather_dim: int
):
Expand Down Expand Up @@ -200,9 +184,8 @@ def ensure_stateful_is_available(warn=True):


def ensure_export_task_support_stateful(task: str):
synonyms_for_task = TasksManager.synonyms_for_task(task)
synonyms_for_task.add(task)
return "text-generation-with-past" in synonyms_for_task
task = TasksManager.map_from_synonym(task)
return task == "text-generation-with-past"


def patch_stateful(config: PretrainedConfig, ov_model: ov.Model):
Expand Down
5 changes: 1 addition & 4 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,7 @@ def test_compare_to_transformers(self, model_arch):
ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True)
self.assertIsInstance(ov_model.config, PretrainedConfig)
self.assertTrue(ov_model.use_cache)
if self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode":
self.assertTrue(ov_model.stateful)
else:
self.assertFalse(ov_model.stateful)
self.assertEqual(ov_model.stateful, self.IS_SUPPORT_STATEFUL and model_arch != "gpt_bigcode")
transformers_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokens = tokenizer(
Expand Down
9 changes: 3 additions & 6 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,9 @@ def test_ovmodel_stateful_load_with_compressed_weights(self, model_cls, model_ty
self.assertTrue(model.stateful)
self.assertTrue(model.use_cache)

models = [model]

expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type]
for i, model in enumerate(models):
_, num_int8, _ = get_num_quantized_nodes(model)
self.assertEqual(expected_ov_int8[i], num_int8)
expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type][0]
_, num_int8, _ = get_num_quantized_nodes(model)
self.assertEqual(expected_ov_int8, num_int8)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):
Expand Down

0 comments on commit b5e87b2

Please sign in to comment.