diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 68d737fe74..f5db8feec2 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -359,7 +359,7 @@ def forward( inputs["attention_mask"] = np.array(attention_mask) # Run inference - self.request.start_async(inputs, shared_memory=True) + self.request.start_async(inputs, share_inputs=True) self.request.wait() logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device)