Skip to content

Commit

Permalink
Merge pull request #180 from abzb1/main
Browse files Browse the repository at this point in the history
[Add Dataset] SEEDBench 2 Plus
  • Loading branch information
Luodian authored Aug 7, 2024
2 parents 3d4884a + ceb1371 commit 82abfa9
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/current_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
- ScreenSpot REG / Instruction Generation (screenspot_reg)
- SeedBench (seedbench)
- SeedBench 2 (seedbench_2)
- SeedBench 2 Plus (seedbench_2_plus)
- ST-VQA (stvqa)
- TextCaps (textcaps)
- TextCaps Validation (textcaps_val)
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/seedbench_2/seedbench_2.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset_path: lmms-lab/SEED-Bench-2
dataset_kwargs:
token: True
task: "seedbench-2"
task: "seedbench_2"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.seed_doc_to_visual
Expand Down
43 changes: 43 additions & 0 deletions lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
dataset_path: doolayer/SEED-Bench-2-Plus
dataset_kwargs:
token: True
task: "seedbench_2_plus"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.seed_doc_to_visual
doc_to_text: !function utils.seed_doc_to_text
doc_to_target: "answer"
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 16
image_aspect_ratio: original
# The return value of process_results will be used by metrics
process_results: !function utils.seed_process_result
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: seedbench_2_plus_Chart
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_Map
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_Web
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
- metric: seedbench_2_plus_all
aggregation: !function utils.seed_aggregation_result
higher_is_better: true
metadata:
- version: 0.0

model_specific_prompt_kwargs:
llava :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
gpt4V :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
default :
img_token : <image>
post_prompt : "Answer with the option's letter from the given choices directly."
52 changes: 52 additions & 0 deletions lmms_eval/tasks/seedbench_2_plus/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import json

def seed_doc_to_visual(doc):
return [doc["image"].convert("RGB")]

def parse_choice_img(choice: str, img_token: str):
if "jpg" in choice or "png" in choice:
return img_token
return choice


def seed_doc_to_text(doc, model_specific_kwargs=None):
question = doc["question"]
question.replace("<img>", model_specific_kwargs["img_token"])
question += "\n" + f"A. {parse_choice_img(doc['choice_A'], model_specific_kwargs['img_token'])}\n"
question += f"B. {parse_choice_img(doc['choice_B'], model_specific_kwargs['img_token'])}\n"
question += f"C. {parse_choice_img(doc['choice_C'], model_specific_kwargs['img_token'])}\n"
question += f"D. {parse_choice_img(doc['choice_D'], model_specific_kwargs['img_token'])}"

return f"{question}\n{model_specific_kwargs['post_prompt']}"


def seed_process_result(doc, result):
pred = result[0].strip()
if len(pred) > 1:
pred = pred[0]
answer = doc["answer"]
data_type = doc["question_image_type"].capitalize()

return {f"seedbench_2_plus_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seedbench_2_plus_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}}


def seed_aggregation_result(results):
total_count = 0
total_correct = 0
for result in results:
if result["pred"].lower().strip() == result["answer"].lower().strip():
total_correct += 1
total_count += 1
return total_correct / total_count if total_count != 0 else 0


def seed_aggregation_result_all(results):
score = seed_aggregation_result(results)
stored_results = []
for result in results:
stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
with open("./seed_submission.json", "w") as f:
json.dump(stored_results, f, indent=4)
print("Storing files for seed_submission ...")

return score

0 comments on commit 82abfa9

Please sign in to comment.