diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 6ce7a658c3..7a98f13e1c 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -367,9 +367,9 @@ def _llama_gemma_update_causal_mask_legacy(self, attention_mask, input_tensor, c offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[ - : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] - ] = mask_slice + causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( + mask_slice + ) if ( self.config._attn_implementation == "sdpa" @@ -1640,9 +1640,9 @@ def _dbrx_update_causal_mask_legacy( offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[ - : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] - ] = mask_slice + causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( + mask_slice + ) if ( self.config._attn_implementation == "sdpa" diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index ea8eb024ae..a345d90a03 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -25,10 +25,10 @@ import torch from datasets import Dataset, load_dataset from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.utils.export import torch_to_int8_onnx from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.model.torch_model import IPEXModel, PyTorchModel from neural_compressor.quantization import fit +from neural_compressor.utils.export import torch_to_int8_onnx from packaging.version import parse from torch.utils.data import DataLoader, RandomSampler from transformers import ( diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 352c95fc84..1669cb8143 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -571,9 +571,11 @@ def _expand_outputs_for_generation(self, indicies, logits: torch.Tensor, past_ke ): past_key_values = tuple( tuple( - past_state[indicies] - if not self.config.model_type == "chatglm" - else past_state[:, indicies, ...] + ( + past_state[indicies] + if not self.config.model_type == "chatglm" + else past_state[:, indicies, ...] + ) for past_state in layer_past ) for layer_past in past_key_values