diff --git a/lmms_eval/tasks/gqa_ru/gqa_ru.yaml b/lmms_eval/tasks/gqa_ru/gqa_ru.yaml new file mode 100644 index 000000000..2a3d10972 --- /dev/null +++ b/lmms_eval/tasks/gqa_ru/gqa_ru.yaml @@ -0,0 +1,29 @@ +dataset_path: deepvk/GQA-ru +dataset_name: testdev_balanced_instructions +dataset_kwargs: + token: True +task: "gqa-ru" +test_split: testdev +output_type: generate_until +doc_to_visual: !function utils.gqa_doc_to_visual +doc_to_text: !function utils.gqa_doc_to_text +doc_to_target: "answer" +generation_kwargs: + max_new_tokens: 16 + temperature: 0 + top_p: 1.0 + num_beams: 1 + do_sample: false +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + - version: 0.0 + +model_specific_prompt_kwargs: + default: + pre_prompt: "" + post_prompt: "\nОтветь одним словом." \ No newline at end of file diff --git a/lmms_eval/tasks/gqa_ru/utils.py b/lmms_eval/tasks/gqa_ru/utils.py new file mode 100644 index 000000000..9c1acb9ce --- /dev/null +++ b/lmms_eval/tasks/gqa_ru/utils.py @@ -0,0 +1,23 @@ +from datasets import load_dataset + +GQA_RAW_IMAGE_DATASET = None +GQA_ID2IMAGE = None + + +def gqa_doc_to_visual(doc): + global GQA_RAW_IMAGE_DATASET + global GQA_ID2IMAGE + if GQA_RAW_IMAGE_DATASET is None: + GQA_RAW_IMAGE_DATASET = load_dataset("deepvk/GQA-ru", "testdev_balanced_images", split="testdev", token=True) + GQA_ID2IMAGE = {} + for row in GQA_RAW_IMAGE_DATASET: + GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB") + image = GQA_ID2IMAGE[doc["imageId"]] + return [image] + + +def gqa_doc_to_text(doc, model_specific_prompt_kwargs): + question = doc["question"] + pre_prompt = model_specific_prompt_kwargs["pre_prompt"] + post_prompt = model_specific_prompt_kwargs["post_prompt"] + return f"{pre_prompt}{question}{post_prompt}"