Skip to content

Commit

Permalink
Mmvetv2 (#458)
Browse files Browse the repository at this point in the history
* add task MMVet-v2

* Fix lint warnings using pre-commit

* add comment for mmvetv2_group_img.yaml

* fix images_tokens format
  • Loading branch information
frankRenlf authored Dec 13, 2024
1 parent 00a8422 commit d8f1f9c
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 29 deletions.
1 change: 1 addition & 0 deletions lmms_eval/tasks/mmvetv2/mmvetv2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ metric_list:
metadata:
version: 0.0
gpt_eval_model_name: "gpt-4-0613"
interleaved_format: false
lmms_eval_specific_kwargs:
default:
pre_prompt: "First please perform reasoning, and think step by step to provide best answer to the following question: \n\n"
Expand Down
1 change: 1 addition & 0 deletions lmms_eval/tasks/mmvetv2/mmvetv2_group_img.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ metric_list:
metadata:
version: 0.0
gpt_eval_model_name: "gpt-4-0613"
interleaved_format: false
lmms_eval_specific_kwargs:
default:
pre_prompt: "First please perform reasoning, and think step by step to provide best answer to the following question: \n\n"
Expand Down
40 changes: 11 additions & 29 deletions lmms_eval/tasks/mmvetv2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,48 +138,30 @@ def process_images(images, size=1008):
return concat_horizontal


def get_images_tokens(input_string):
images = []
queries = input_string.split("<IMG>")
for query in queries:
query = query.strip()
if query.endswith((".jpg", ".png", ".jpeg")):
# image_path = os.path.join(image_folder, query)
# images.append(Image.open(image_path).convert("RGB"))
images.append(query)
return images


def mmvet_group_img_doc_to_visual(doc):
# if doc["image"] is None:
# return []
prompt = doc["question"]
image_tokens = get_images_tokens(prompt)
visual = [doc[image_token].convert("RGB") for image_token in image_tokens]
image_tokens = re.findall(r"<image_\d+>", prompt)
visual = [doc[image_token.strip("<>")].convert("RGB") for image_token in image_tokens]
visual = process_images(visual)
return [visual]


def mmvet_doc_to_visual(doc):
# if doc["image"] is None:
# return []
prompt = doc["question"]
image_tokens = get_images_tokens(prompt)
visual = [doc[image_token].convert("RGB") for image_token in image_tokens]
image_tokens = re.findall(r"<image_\d+>", prompt)
visual = [doc[image_token.strip("<>")].convert("RGB") for image_token in image_tokens]
return visual


def replace_images_tokens(input_string):
text_queries = []
if config["metadata"]["interleaved_format"]:
for i in range(0, 18):
question_text = f"<image_{i}>"
query_text = "<image>"
if question_text in input_string:
input_string = input_string.replace(question_text, query_text)
queries = input_string.split("<IMG>")
for query in queries:
query = query.strip()
if query.endswith((".jpg", ".png", ".jpeg")):
text_queries.append("[<IMG_PLH>]")
else:
text_queries.append(query)
question = "".join(text_queries)
return question
return "".join(queries)


def doc_to_text(doc, lmms_eval_specific_kwargs=None):
Expand Down

0 comments on commit d8f1f9c

Please sign in to comment.