From b2a009b6bbf8353172f5a1dd9c29ea1f67610c02 Mon Sep 17 00:00:00 2001 From: Pu Fanyi Date: Mon, 15 Jul 2024 19:12:25 -0700 Subject: [PATCH] if no response directly return 0 (#142) --- lmms_eval/tasks/live_bench/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lmms_eval/tasks/live_bench/utils.py b/lmms_eval/tasks/live_bench/utils.py index f83812936..ae0b1ac4c 100644 --- a/lmms_eval/tasks/live_bench/utils.py +++ b/lmms_eval/tasks/live_bench/utils.py @@ -165,8 +165,8 @@ def livebench_process_results(doc, results): criteria = doc["criteria"] if subtask not in SUBTASKS: subtask = "further insights" - if not results: - return {"gpt4_eval_score": {"rating": -1, "explanation": "No response", "model_name": "N/A", "subtask": subtask}} + if not results or results[0] == "": + return {"gpt4_eval_score": {"rating": 0, "explanation": "No response", "model_name": "N/A", "subtask": subtask}} rating, explanation, model_name = get_chat_response(base64_images=base64_images, question=doc["question"], ground_truth_answer=doc["answer"], answer=results[0] if results else "", criteria=criteria) if rating >= 0: return {"gpt4_eval_score": {"rating": rating, "explanation": explanation, "model_name": model_name, "subtask": subtask, "id": doc["id"]}}