diff --git a/lmms_eval/tasks/__init__.py b/lmms_eval/tasks/__init__.py index e6e25d857..dc77ed47f 100644 --- a/lmms_eval/tasks/__init__.py +++ b/lmms_eval/tasks/__init__.py @@ -90,9 +90,10 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None: # the user defines the appropriate verbosity. except ModuleNotFoundError as e: eval_logger.debug(f"{yaml_path}: {e}. Config will not be added to registry.") + print(f"{yaml_path}: {e}. Config will not be added to registry.") except Exception as error: import traceback - + eval_logger.debug(f"Failed to load config in {yaml_path}. Config will not be added to registry\n" f"Error: {error}\n" f"Traceback: {traceback.format_exc()}") return 0 diff --git a/lmms_eval/tasks/mmbench/_default_template_mmbench.yaml b/lmms_eval/tasks/mmbench/_default_template_mmbench.yaml deleted file mode 100644 index 8520cc2c9..000000000 --- a/lmms_eval/tasks/mmbench/_default_template_mmbench.yaml +++ /dev/null @@ -1,12 +0,0 @@ -dataset_path: lmms-lab/MMBench -dataset_kwargs: - token: True -generation_kwargs: - until: - - "ASSISTANT:" - max_new_tokens: 1024 - temperature: 0 - top_p: 0 - num_beams: 1 - do_sample: false -doc_to_target: "answer" diff --git a/lmms_eval/tasks/mmbench/_default_template_mmbench_cn.yaml b/lmms_eval/tasks/mmbench/_default_template_mmbench_cn.yaml index 31f644b1f..81094620b 100644 --- a/lmms_eval/tasks/mmbench/_default_template_mmbench_cn.yaml +++ b/lmms_eval/tasks/mmbench/_default_template_mmbench_cn.yaml @@ -1,11 +1,11 @@ dataset_path: lmms-lab/MMBench dataset_kwargs: token: True +doc_to_target: "answer" dataset_name: "cn" output_type: generate_until doc_to_visual: !function cn_utils.mmbench_doc_to_visual doc_to_text: !function cn_utils.mmbench_doc_to_text -doc_to_target: "answer" generation_kwargs: max_new_tokens: 256 temperature: 0 @@ -13,11 +13,6 @@ generation_kwargs: num_beams: 1 do_sample: false process_results: !function cn_utils.mmbench_process_results -metadata: - version: 0.0 - gpt_eval_model_name: "gpt-3.5-turbo" - quick_extract: true - sys_prompt: "有如下几个选项:" model_specific_prompt_kwargs: default: pre_prompt: "" @@ -25,4 +20,3 @@ model_specific_prompt_kwargs: model_specific_generation_kwargs: llava: image_aspect_ratio: original -include: _default_template_mmbench.yaml diff --git a/lmms_eval/tasks/mmbench/_default_template_mmbench_en.yaml b/lmms_eval/tasks/mmbench/_default_template_mmbench_en.yaml index b885a6b57..ab2b882c8 100644 --- a/lmms_eval/tasks/mmbench/_default_template_mmbench_en.yaml +++ b/lmms_eval/tasks/mmbench/_default_template_mmbench_en.yaml @@ -1,3 +1,7 @@ +dataset_path: lmms-lab/MMBench +dataset_kwargs: + token: True +doc_to_target: "answer" model_specific_prompt_kwargs: default: pre_prompt: "" @@ -10,5 +14,12 @@ model_specific_generation_kwargs: llava: image_aspect_ratio: original output_type: generate_until -include: _default_template_mmbench.yaml dataset_name: "en" +generation_kwargs: + until: + - "ASSISTANT:" + max_new_tokens: 1024 + temperature: 0 + top_p: 0 + num_beams: 1 + do_sample: false diff --git a/lmms_eval/tasks/mmbench/mmbench_cn.yaml b/lmms_eval/tasks/mmbench/mmbench_cn.yaml index a587a6563..6232531c4 100644 --- a/lmms_eval/tasks/mmbench/mmbench_cn.yaml +++ b/lmms_eval/tasks/mmbench/mmbench_cn.yaml @@ -3,3 +3,8 @@ task: - mmbench_cn_dev - mmbench_cn_test - mmbench_cn_cc +metadata: + version: 0.0 + gpt_eval_model_name: "gpt-3.5-turbo" + quick_extract: true + sys_prompt: "有如下几个选项:"