diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index ff863854d0..2cf1fd466c 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -320,6 +320,6 @@ def main_export( input_shapes=input_shapes, device=device, fp16=fp16, - load_in_8bit=int8, + int8=int8, model_kwargs=model_kwargs, ) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 3cf74a328e..04b7fb642f 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -75,7 +75,7 @@ def export( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, fp16: bool = False, - load_in_8bit: bool = False, + int8: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an OpenVINO Intermediate Representation. @@ -118,7 +118,7 @@ def export( input_shapes=input_shapes, model_kwargs=model_kwargs, fp16=fp16, - load_in_8bit=load_in_8bit, + int8=int8, ) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): @@ -173,7 +173,7 @@ def export_pytorch_via_onnx( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, fp16: bool = False, - load_in_8bit: bool = False, + int8: bool = False, ): """ Exports a PyTorch model to an OpenVINO Intermediate Representation via ONNX export. @@ -216,7 +216,7 @@ def export_pytorch_via_onnx( ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, compress_to_fp16=fp16, - load_in_8bit=load_in_8bit, + load_in_8bit=int8, ) return input_names, output_names, True @@ -230,7 +230,7 @@ def export_pytorch( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, fp16: bool = False, - load_in_8bit: bool = False, + int8: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an OpenVINO Intermediate Representation. @@ -325,7 +325,7 @@ def ts_patched_forward(*args, **kwargs): ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) except Exception as ex: logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX") - return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs, fp16=fp16, load_in_8bit=load_in_8bit) + return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs, fp16=fp16, int8=int8) ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) @@ -346,7 +346,7 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - _save_model(ov_model, output, compress_to_fp16=fp16, load_in_8bit=load_in_8bit) + _save_model(ov_model, output, compress_to_fp16=fp16, load_in_8bit=int8) clear_class_registry() del model gc.collect() @@ -364,7 +364,7 @@ def export_models( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, fp16: bool = False, - load_in_8bit: bool = False, + int8: bool = False, ) -> Tuple[List[List[str]], List[List[str]]]: """ Export the models to OpenVINO IR format @@ -410,7 +410,7 @@ def export_models( input_shapes=input_shapes, model_kwargs=model_kwargs, fp16=fp16, - load_in_8bit=load_in_8bit, + int8=int8, ) ) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index c477d487a2..d898d5016c 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -219,6 +219,7 @@ def _from_transformers( local_files_only: bool = False, task: Optional[str] = None, trust_remote_code: bool = False, + # load_in_8bit: bool = False, # TODO : add int8 **kwargs, ): """