Skip to content

Commit

Permalink
fix yaml and aggregate functions
Browse files Browse the repository at this point in the history
  • Loading branch information
JvThunder committed Mar 15, 2024
1 parent e42aa91 commit 71a5e88
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
11 changes: 7 additions & 4 deletions lmms_eval/tasks/olympiadbench/olympiadbench_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ generation_kwargs:
process_results: !function utils.olympiadbench_process_results
metric_list:
- metric: submission
aggregation: !function utils.mathvista_aggregate_results
aggregation: !function utils.olympiadbench_aggregation_submissions
higher_is_better: true
- metric: auto_scoring
aggregation: !function utils.auto_scoring
higher_is_better: True
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
process_results: !function utils.olympiadbench_process_results

model_specific_prompt_kwargs:
default:
Expand Down
22 changes: 11 additions & 11 deletions lmms_eval/tasks/olympiadbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def olympiadbench_doc_to_text(doc):
final_question = pre_prompt + question + '\n' + post_prompt
return final_question

#TODO
def olympiadbench_process_results(doc, results):
precision = doc["precision"]
answer_type = doc["answer_type"]
Expand All @@ -63,21 +62,22 @@ def olympiadbench_process_results(doc, results):

if answer_type == "Need_human_evaluate":
return {
"human_eval": prediction,
"submission": prediction
}
else:
prediction = prediction.split(" final answer is")
prediction = prediction.replace("\n", "").replace(" ", "").replace(".", "")
olympiadbench_evaluator.judge(prediction, doc["final_answer"][0], precision)
accuracy = olympiadbench_evaluator.judge(prediction, doc["final_answer"][0], precision)
accuracy = int(accuracy)
return {
"eval_score": prediction,
"prediction": prediction
"exact_match": accuracy
}

#TODO
def olympiadbench_aggregation_results(results, metric, args):
pass

def auto_scoring(results, metric, args):
pass
def olympiadbench_aggregation_submissions(results, args):
now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S")
submission_file_name = f"olympiadbench-test-submission-{now_date_time}.json"
path = generate_submission_file(submission_file_name, args)
with open(path, "w") as f:
json.dump(results, f)
print(f"Submission file saved to {path}")

0 comments on commit 71a5e88

Please sign in to comment.