diff --git a/lmms_eval/models/qwen2_vl.py b/lmms_eval/models/qwen2_vl.py index a7ac04f1..ffef9d3d 100755 --- a/lmms_eval/models/qwen2_vl.py +++ b/lmms_eval/models/qwen2_vl.py @@ -38,7 +38,7 @@ def __init__( batch_size: Optional[Union[int, str]] = 1, use_cache=True, use_flash_attention_2: Optional[bool] = False, - max_pixels: int = 12845056, + max_pixels: int = 1605632, min_pixels: int = 3136, max_num_frames: int = 32, **kwargs, diff --git a/lmms_eval/tasks/mmmu/getdata.py b/lmms_eval/tasks/mmmu/getdata.py new file mode 100644 index 00000000..486cbfc6 --- /dev/null +++ b/lmms_eval/tasks/mmmu/getdata.py @@ -0,0 +1,13 @@ +import datasets +from datasets import load_dataset + + +def gen(): + data = load_dataset("lmms-lab/MMMU") + yield from data["dev"] + yield from data["validation"] + + +final_data = datasets.Dataset.from_generator(gen) + +final_data.push_to_hub("pufanyi/MMMU", split="validation") diff --git a/lmms_eval/tasks/mmmu/mmmu_dev_val.yaml b/lmms_eval/tasks/mmmu/mmmu_dev_val.yaml new file mode 100755 index 00000000..aefd46d3 --- /dev/null +++ b/lmms_eval/tasks/mmmu/mmmu_dev_val.yaml @@ -0,0 +1,16 @@ +dataset_path: pufanyi/MMMU +task: "mmmu_dev_val" +test_split: validation +output_type: generate_until +doc_to_visual: !function utils.mmmu_doc_to_visual +doc_to_text: !function utils.mmmu_doc_to_text +doc_to_target: "answer" +# The return value of process_results will be used by metrics +process_results: !function utils.mmmu_process_results + +metric_list: + - metric: mmmu_acc + aggregation: !function utils.mmmu_aggregate_results + higher_is_better: true + +include: _default_template_yaml \ No newline at end of file