diff --git a/evals/registry/data/imo_exact_answers/samples.jsonl b/evals/registry/data/imo_exact_answers/samples.jsonl new file mode 100644 index 0000000000..ca5a5dac4a --- /dev/null +++ b/evals/registry/data/imo_exact_answers/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc97a4694cf576bf00540fae940906f5c63547fe421210738079072801e70b8f +size 18492 diff --git a/evals/registry/evals/imo_exact_answers.yaml b/evals/registry/evals/imo_exact_answers.yaml new file mode 100644 index 0000000000..4d72c0e5cf --- /dev/null +++ b/evals/registry/evals/imo_exact_answers.yaml @@ -0,0 +1,9 @@ +imo_exact_answers: + id: imo_exact_answers.dev.v0 + description: A small set of IMO problems that have exact answers (e.g. yes/no, number, fraction). + metrics: [accuracy] + +imo_exact_answers.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: imo_exact_answers/samples.jsonl