diff --git a/evals/registry/data/imo_solutions_only/samples.jsonl b/evals/registry/data/imo_exact_answers/samples.jsonl similarity index 100% rename from evals/registry/data/imo_solutions_only/samples.jsonl rename to evals/registry/data/imo_exact_answers/samples.jsonl diff --git a/evals/registry/evals/imo_exact_answers.yaml b/evals/registry/evals/imo_exact_answers.yaml new file mode 100644 index 0000000000..f02fa6a5d5 --- /dev/null +++ b/evals/registry/evals/imo_exact_answers.yaml @@ -0,0 +1,9 @@ +: + id: imo_exact_answers.dev.v0 + description: A small set of IMO problems that have exact answers (e.g. yes/no, number, fraction). + metrics: [accuracy] + +.dev.v0: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: imo_exact_answers/samples.jsonl diff --git a/evals/registry/evals/imo_solutions_only.yaml b/evals/registry/evals/imo_solutions_only.yaml deleted file mode 100644 index 5e2b8e2972..0000000000 --- a/evals/registry/evals/imo_solutions_only.yaml +++ /dev/null @@ -1,9 +0,0 @@ -: - id: imo_solutions_only.dev.v0 - description: A small set of IMO problems that have yes/no, numeric solutions as answers. - metrics: [accuracy] - -.dev.v0: - class: evals.elsuite.basic.match:Match - args: - samples_jsonl: imo_solutions_only/samples.jsonl