Skip to content

Commit

Permalink
Add test and train sets to in-loop oe-eval (for ladder work)
Browse files Browse the repository at this point in the history
  • Loading branch information
liujch1998 committed Nov 18, 2024
1 parent 7e81a6c commit 5368d86
Show file tree
Hide file tree
Showing 85 changed files with 541 additions and 0 deletions.
499 changes: 499 additions & 0 deletions olmo/eval/downstream.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge:mc", "task_hash": "11abfade7ecce501f3e3e72c937e19cc", "task_config": {"task_name": "arc_challenge:mc", "task_core": "arc_challenge", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:mc::olmes"}}, "current_date": "2024-11-18 22:05:58 UTC", "num_instances": 1172}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge", "task_hash": "b122d520ab0cf70114350ecf00c5c811", "task_config": {"task_name": "arc_challenge", "task_core": "arc_challenge", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:rc::olmes"}}, "current_date": "2024-11-18 21:50:18 UTC", "num_instances": 1172}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge:mc", "task_hash": "cf2769a2dc6cbea724ff477c3d2543a2", "task_config": {"task_name": "arc_challenge:mc", "task_core": "arc_challenge", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:mc::olmes"}}, "current_date": "2024-11-18 22:05:40 UTC", "num_instances": 1119}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge", "task_hash": "9045ed0bd68a7e9ff34cf51ff24828bf", "task_config": {"task_name": "arc_challenge", "task_core": "arc_challenge", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:rc::olmes"}}, "current_date": "2024-11-18 22:05:31 UTC", "num_instances": 1119}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge:mc", "task_hash": "a673d7761ce3fc3d5061d72f76755971", "task_config": {"task_name": "arc_challenge:mc", "task_core": "arc_challenge", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:mc::olmes"}}, "current_date": "2024-11-18 22:05:49 UTC", "num_instances": 299}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_challenge", "task_hash": "bd181c90c43b3ef799af2f300ea09cf1", "task_config": {"task_name": "arc_challenge", "task_core": "arc_challenge", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:rc::olmes"}}, "current_date": "2024-11-18 21:45:07 UTC", "num_instances": 299}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/arc_easy/test_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy:mc", "task_hash": "64250ca6fdf0f02e07b539e8efc04922", "task_config": {"task_name": "arc_easy:mc", "task_core": "arc_easy", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (MC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:mc::olmes"}}, "current_date": "2024-11-18 22:06:33 UTC", "num_instances": 2376}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/arc_easy/test_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy", "task_hash": "ccbbd993c851d3300140d81ffec0e397", "task_config": {"task_name": "arc_easy", "task_core": "arc_easy", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {"description": null}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (RC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:rc::olmes"}}, "current_date": "2024-11-18 21:50:27 UTC", "num_instances": 2376}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy:mc", "task_hash": "afa7e96b485c4e4481b3b9b817faac36", "task_config": {"task_name": "arc_easy:mc", "task_core": "arc_easy", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (MC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:mc::olmes"}}, "current_date": "2024-11-18 22:06:16 UTC", "num_instances": 2251}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy", "task_hash": "4a5241b308edb45d7b9eab594093c519", "task_config": {"task_name": "arc_easy", "task_core": "arc_easy", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {"description": null}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (RC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:rc::olmes"}}, "current_date": "2024-11-18 22:06:07 UTC", "num_instances": 2251}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/arc_easy/val_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy:mc", "task_hash": "443bd52f752399615d01c853a8d7386c", "task_config": {"task_name": "arc_easy:mc", "task_core": "arc_easy", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (MC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:mc::olmes"}}, "current_date": "2024-11-18 22:06:24 UTC", "num_instances": 570}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/arc_easy/val_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "arc_easy", "task_hash": "0045e4f588a617cbe9ee5a4ae8ca1ce5", "task_config": {"task_name": "arc_easy", "task_core": "arc_easy", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {"description": null}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:ARC-Easy", "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"description": "ARC-Easy (RC) using OLMES-v0.1", "regimes": ["OLMES-v0.1"], "alias": "arc_easy:rc::olmes"}}, "current_date": "2024-11-18 21:45:15 UTC", "num_instances": 570}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/boolq/train_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "boolq:mc", "task_hash": "a92ca849d7efd331110145eb71e4fc09", "task_config": {"task_name": "boolq:mc", "task_core": "boolq", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": null}, "native_id_field": "idx", "fewshot_source": "OLMES:BoolQ", "dataset_path": "super_glue", "dataset_name": "boolq", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "boolq:mc::olmes"}}, "current_date": "2024-11-18 22:06:52 UTC", "num_instances": 9427}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/boolq/train_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "boolq", "task_hash": "ec8729b372d310aaf3a222f37a7af7b9", "task_config": {"task_name": "boolq", "task_core": "boolq", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": null}, "native_id_field": "idx", "fewshot_source": "OLMES:BoolQ", "dataset_path": "super_glue", "dataset_name": "boolq", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "boolq:rc::olmes"}}, "current_date": "2024-11-18 22:06:41 UTC", "num_instances": 9427}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/boolq/val_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "boolq:mc", "task_hash": "d88f45757f4a8c3802b7274857894a90", "task_config": {"task_name": "boolq:mc", "task_core": "boolq", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": null}, "native_id_field": "idx", "fewshot_source": "OLMES:BoolQ", "dataset_path": "super_glue", "dataset_name": "boolq", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "boolq:mc::olmes"}}, "current_date": "2024-11-18 22:07:01 UTC", "num_instances": 3270}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/boolq/val_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "boolq", "task_hash": "8942f8464f48343f6ab2773d4a75d344", "task_config": {"task_name": "boolq", "task_core": "boolq", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": null}, "native_id_field": "idx", "fewshot_source": "OLMES:BoolQ", "dataset_path": "super_glue", "dataset_name": "boolq", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "boolq:rc::olmes"}}, "current_date": "2024-11-18 21:36:50 UTC", "num_instances": 3270}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/csqa/train_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "csqa:mc", "task_hash": "a47780e5c1faaccf3586bfc1e5cb020c", "task_config": {"task_name": "csqa:mc", "task_core": "csqa", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:commonsense_qa", "dataset_path": "commonsense_qa", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "csqa:mc::olmes"}}, "current_date": "2024-11-18 22:07:22 UTC", "num_instances": 9741}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/csqa/train_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "csqa", "task_hash": "d859c48ca5bd25f69a8a64ad585b4447", "task_config": {"task_name": "csqa", "task_core": "csqa", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:commonsense_qa", "dataset_path": "commonsense_qa", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "csqa:rc::olmes"}}, "current_date": "2024-11-18 22:07:11 UTC", "num_instances": 9741}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/csqa/val_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "csqa:mc", "task_hash": "303a106ee47d83c3f9ae7e33bd993f0d", "task_config": {"task_name": "csqa:mc", "task_core": "csqa", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:commonsense_qa", "dataset_path": "commonsense_qa", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "csqa:mc::olmes"}}, "current_date": "2024-11-18 22:07:31 UTC", "num_instances": 1221}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/csqa/val_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "csqa", "task_hash": "6246a67a1b7a81aaa134aaae4480f1b9", "task_config": {"task_name": "csqa", "task_core": "csqa", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:commonsense_qa", "dataset_path": "commonsense_qa", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "csqa:rc::olmes"}}, "current_date": "2024-11-18 21:37:21 UTC", "num_instances": 1221}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "hellaswag:mc", "task_hash": "800769140f7d2dbfebbdcb6cabc30f0a", "task_config": {"task_name": "hellaswag:mc", "task_core": "hellaswag", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "ind", "fewshot_source": "OLMES:hellaswag", "dataset_path": "hellaswag", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "hellaswag:mc::olmes"}}, "current_date": "2024-11-18 22:08:27 UTC", "num_instances": 39905}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "hellaswag", "task_hash": "25aa48c8c349d06cf85e1d9667d1c63f", "task_config": {"task_name": "hellaswag", "task_core": "hellaswag", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "ind", "fewshot_source": "OLMES:hellaswag", "dataset_path": "hellaswag", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "hellaswag:rc::olmes"}}, "current_date": "2024-11-18 22:07:53 UTC", "num_instances": 39905}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/hellaswag/val_mc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "hellaswag:mc", "task_hash": "8c78527c6c17a8765b7a0b53353e7266", "task_config": {"task_name": "hellaswag:mc", "task_core": "hellaswag", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "ind", "fewshot_source": "OLMES:hellaswag", "dataset_path": "hellaswag", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "hellaswag:mc::olmes"}}, "current_date": "2024-11-18 22:08:50 UTC", "num_instances": 10042}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/hellaswag/val_rc_5shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "hellaswag", "task_hash": "e735ce12b24a16e9d583e6ee5bfe720f", "task_config": {"task_name": "hellaswag", "task_core": "hellaswag", "limit": 1000000, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "ind", "fewshot_source": "OLMES:hellaswag", "dataset_path": "hellaswag", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "hellaswag:rc::olmes"}}, "current_date": "2024-11-18 21:30:24 UTC", "num_instances": 10042}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "openbookqa:mc", "task_hash": "c29899952d034341728ff084a971203b", "task_config": {"task_name": "openbookqa:mc", "task_core": "openbookqa", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:openbookqa", "dataset_path": "openbookqa", "dataset_name": "main", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "openbookqa:mc::olmes"}}, "current_date": "2024-11-18 22:09:27 UTC", "num_instances": 500}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "openbookqa", "task_hash": "85531fd2bc307ef4b3b7c8169838815e", "task_config": {"task_name": "openbookqa", "task_core": "openbookqa", "limit": 1000000, "split": "test", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_per_char", "random_subsample_seed": 1234, "context_kwargs": {"no_prefix": false}, "generation_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:openbookqa", "dataset_path": "openbookqa", "dataset_name": "main", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "openbookqa:rc::olmes"}}, "current_date": "2024-11-18 21:50:36 UTC", "num_instances": 500}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "openbookqa:mc", "task_hash": "dd163decb9d61b713173d24932ba4a8f", "task_config": {"task_name": "openbookqa:mc", "task_core": "openbookqa", "limit": 1000000, "split": "train", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_raw", "random_subsample_seed": 1234, "context_kwargs": {}, "generation_kwargs": {}, "metric_kwargs": {}, "native_id_field": "id", "fewshot_source": "OLMES:openbookqa", "dataset_path": "openbookqa", "dataset_name": "main", "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "openbookqa:mc::olmes"}}, "current_date": "2024-11-18 22:09:11 UTC", "num_instances": 4957}
Binary file not shown.
Loading

0 comments on commit 5368d86

Please sign in to comment.