Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Add Dataset] SEEDBench 2 Plus #180

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/current_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
- ScreenSpot REG / Instruction Generation (screenspot_reg)
- SeedBench (seedbench)
- SeedBench 2 (seedbench_2)
- SeedBench 2 Plus (seedbench_2_plus)
- ST-VQA (stvqa)
- TextCaps (textcaps)
- TextCaps Validation (textcaps_val)
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/seedbench_2/seedbench_2.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset_path: lmms-lab/SEED-Bench-2
dataset_kwargs:
token: True
task: "seedbench-2"
task: "seedbench_2"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.seed_doc_to_visual
Expand Down
43 changes: 43 additions & 0 deletions lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
dataset_path: doolayer/SEED-Bench-2-Plus
dataset_kwargs:
token: True
task: "seedbench_2_plus"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.seed_doc_to_visual
doc_to_text: !function utils.seed_doc_to_text
doc_to_target: "answer"
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 16
image_aspect_ratio: original
# The return value of process_results will be used by metrics
process_results: !function utils.seed_process_result
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: seedbench_2_plus_Chart
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_Map
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_Web
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_all
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
metadata:
- version: 0.0

model_specific_prompt_kwargs:
llava :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
gpt4V :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
default :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
52 changes: 52 additions & 0 deletions lmms_eval/tasks/seedbench_2_plus/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import json

def seed_doc_to_visual(doc):
return [doc["image"].convert("RGB")]

def parse_choice_img(choice: str, img_token: str):
if "jpg" in choice or "png" in choice:
return img_token
return choice


def seed_doc_to_text(doc, model_specific_kwargs=None):
question = doc["question"]
question.replace("<img>", model_specific_kwargs["img_token"])
question += "\n" + f"A. {parse_choice_img(doc['choice_A'], model_specific_kwargs['img_token'])}\n"
question += f"B. {parse_choice_img(doc['choice_B'], model_specific_kwargs['img_token'])}\n"
question += f"C. {parse_choice_img(doc['choice_C'], model_specific_kwargs['img_token'])}\n"
question += f"D. {parse_choice_img(doc['choice_D'], model_specific_kwargs['img_token'])}"

return f"{question}\n{model_specific_kwargs['post_prompt']}"


def seed_process_result(doc, result):
pred = result[0].strip()
if len(pred) > 1:
pred = pred[0]
answer = doc["answer"]
data_type = doc["question_image_type"].capitalize()

return {f"seedbench_2_plus_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seedbench_2_plus_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}}


def seed_aggregation_result(results):
total_count = 0
total_correct = 0
for result in results:
if result["pred"].lower().strip() == result["answer"].lower().strip():
total_correct += 1
total_count += 1
return total_correct / total_count if total_count != 0 else 0


def seed_aggregation_result_all(results):
score = seed_aggregation_result(results)
stored_results = []
for result in results:
stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
with open("./seed_submission.json", "w") as f:
json.dump(stored_results, f, indent=4)
print("Storing files for seed_submission ...")

return score
Loading