Skip to content

Commit

Permalink
update after rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Nov 13, 2024
1 parent 0e67735 commit 125360f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
10 changes: 3 additions & 7 deletions optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def __init__(
ov_config=ov_config,
model_save_dir=model_save_dir,
quantization_config=quantization_config,
compile=not self._compile_only and enable_compilation,
compile=self._compile_only or enable_compilation,
compile_only=self._compile_only,
)
self.vision_embeddings = OVVisionEmbedding(self.vision_embeddings_model, self)
Expand Down Expand Up @@ -648,11 +648,8 @@ def forward(
position_ids=None,
image_bound=None,
tgt_sizes=None,
images=None,
**kwargs,
):
if pixel_values is None and images is not None:
pixel_values = images
inputs_embeds, attention_mask, position_ids = self.get_multimodal_embeddings(
input_ids,
pixel_values,
Expand Down Expand Up @@ -756,7 +753,6 @@ def prepare_inputs_for_generation(
"image_sizes": image_sizes,
"image_bound": kwargs.get("image_bound"),
"tgt_sizes": kwargs.get("tgt_sizes"),
"images": kwargs.get("images"),
}
)
return model_inputs
Expand All @@ -780,7 +776,7 @@ def preprocess_inputs(

class _OVLlavaForCausalLM(OVModelForVisualCausalLM):
auto_model_class = LlavaForConditionalGeneration

def __init__(
self,
language_model: ov.Model,
Expand Down Expand Up @@ -1849,7 +1845,7 @@ def preprocess_inputs(
attention_mask = torch.ones_like(input_ids, dtype=torch.int64)
result = {"input_ids": input_ids, "attention_mask": attention_mask}
if image is not None:
result["images"] = torch.unsqueeze(processor(images=image, return_tensors="pt")["pixel_values"][0], 0)
result["pixel_values"] = processor(images=[image], return_tensors="pt")["pixel_values"]
return result


Expand Down
14 changes: 11 additions & 3 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import unittest
from pathlib import Path
from typing import Dict
import copy

import numpy as np
import open_clip
Expand Down Expand Up @@ -1997,6 +1998,7 @@ def test_compare_to_transformers(self, model_arch):
self.assertIsInstance(getattr(ov_model, additional_part), MODEL_PARTS_CLS_MAPPING[additional_part])
self.assertIsInstance(ov_model.config, PretrainedConfig)
inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600)))
transformers_inputs = copy.deepcopy(inputs)
ov_model.to("AUTO")
self.assertTrue("AUTO" in ov_model._device)
self.assertTrue("AUTO" in ov_model.vision_embeddings._device)
Expand Down Expand Up @@ -2029,11 +2031,17 @@ def test_compare_to_transformers(self, model_arch):
self.assertTrue("CPU" in getattr(ov_model, additional_part)._device)
self.assertTrue(getattr(ov_model, additional_part).request is None)

# nanollava pixel_values input named as images
if model_arch == "nanollava":
pixel_values = transformers_inputs.pop("pixel_values", None)
transformers_inputs["images"] = pixel_values
# pytorch minicpmv is not designed to be used via forward
if model_arch in ["minicpmv", "internvl2"]:
if model_arch not in ["minicpmv", "internvl2"]:
set_seed(SEED)
ov_outputs = ov_model(**inputs)
set_seed(SEED)
with torch.no_grad():
transformers_outputs = transformers_model(**inputs)
transformers_outputs = transformers_model(**transformers_inputs)
self.assertTrue(
torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4),
f"Max abs diff {(torch.abs(ov_outputs.logits - transformers_outputs.logits).max())}",
Expand All @@ -2053,7 +2061,7 @@ def test_compare_to_transformers(self, model_arch):
ov_outputs = ov_model.generate(**inputs, generation_config=gen_config)
set_seed(SEED)
with torch.no_grad():
transformers_outputs = transformers_model.generate(**inputs, generation_config=gen_config)
transformers_outputs = transformers_model.generate(**transformers_inputs, generation_config=gen_config)

# original minicpmv, internvl always skip input tokens in generation results, while transformers based approach provide them
if model_arch in ["minicpmv", "internvl2"]:
Expand Down

0 comments on commit 125360f

Please sign in to comment.