diff --git a/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-google.yaml b/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-google.yaml index d3b02de8d..74b66a557 100644 --- a/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-google.yaml +++ b/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-google.yaml @@ -139,94 +139,76 @@ evaluators: ########################## # Downstream evaluations # ########################## - - label: piqa + - label: mmlu_stem_mc_5shot type: downstream - - label: hellaswag + - label: mmlu_humanities_mc_5shot type: downstream - - label: winogrande + - label: mmlu_social_sciences_mc_5shot type: downstream - - label: openbook_qa + - label: mmlu_other_mc_5shot type: downstream - - label: boolq + - label: arc_challenge_mc_5shot type: downstream - - label: sciq + - label: arc_challenge_mc_5shot_bpb type: downstream - - label: arc_easy + - label: arc_easy_mc_5shot type: downstream - - label: arc_challenge + - label: arc_easy_mc_5shot_bpb type: downstream - - label: copa + - label: boolq_mc_5shot type: downstream - #- label: rte - # type: downstream - - #- label: commitment_bank - # type: downstream - - #- label: sst2 - # type: downstream - - - label: commonsense_qa + - label: boolq_mc_5shot_bpb type: downstream - - label: social_iqa + - label: csqa_mc_5shot type: downstream - - label: mmlu_stem_var + - label: csqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_var + - label: hellaswag_mc_5shot type: downstream - - label: mmlu_social_sciences_var + - label: hellaswag_mc_5shot_bpb type: downstream - - label: mmlu_other_var + - label: openbookqa_mc_5shot type: downstream - - label: mmlu_stem_mc_5shot + - label: openbookqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_mc_5shot + - label: piqa_mc_5shot type: downstream - - label: mmlu_social_sciences_mc_5shot + - label: piqa_mc_5shot_bpb type: downstream - - label: mmlu_other_mc_5shot + - label: socialiqa_mc_5shot type: downstream - - label: mmlu_stem_mc_5shot_test + - label: socialiqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_mc_5shot_test + - label: winogrande_mc_5shot type: downstream - - label: mmlu_social_sciences_mc_5shot_test - type: downstream - - - label: mmlu_other_mc_5shot_test + - label: winogrande_mc_5shot_bpb type: downstream - label: basic_arithmetic type: downstream - - label: trivia_qa_wiki_ppl - type: downstream - - - label: natural_qs_open_ppl - type: downstream - - - label: arc_easy_ppl + - label: hellaswag type: downstream data: diff --git a/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-se-flan-google.yaml b/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-se-flan-google.yaml index 949a17760..2c07dd3b1 100644 --- a/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-se-flan-google.yaml +++ b/configs/annealing/peteish13-anneal-from-476848-300B-moremath-dclm07-fw2-se-flan-google.yaml @@ -139,94 +139,76 @@ evaluators: ########################## # Downstream evaluations # ########################## - - label: piqa + - label: mmlu_stem_mc_5shot type: downstream - - label: hellaswag + - label: mmlu_humanities_mc_5shot type: downstream - - label: winogrande + - label: mmlu_social_sciences_mc_5shot type: downstream - - label: openbook_qa + - label: mmlu_other_mc_5shot type: downstream - - label: boolq + - label: arc_challenge_mc_5shot type: downstream - - label: sciq + - label: arc_challenge_mc_5shot_bpb type: downstream - - label: arc_easy + - label: arc_easy_mc_5shot type: downstream - - label: arc_challenge + - label: arc_easy_mc_5shot_bpb type: downstream - - label: copa + - label: boolq_mc_5shot type: downstream - #- label: rte - # type: downstream - - #- label: commitment_bank - # type: downstream - - #- label: sst2 - # type: downstream - - - label: commonsense_qa + - label: boolq_mc_5shot_bpb type: downstream - - label: social_iqa + - label: csqa_mc_5shot type: downstream - - label: mmlu_stem_var + - label: csqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_var + - label: hellaswag_mc_5shot type: downstream - - label: mmlu_social_sciences_var + - label: hellaswag_mc_5shot_bpb type: downstream - - label: mmlu_other_var + - label: openbookqa_mc_5shot type: downstream - - label: mmlu_stem_mc_5shot + - label: openbookqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_mc_5shot + - label: piqa_mc_5shot type: downstream - - label: mmlu_social_sciences_mc_5shot + - label: piqa_mc_5shot_bpb type: downstream - - label: mmlu_other_mc_5shot + - label: socialiqa_mc_5shot type: downstream - - label: mmlu_stem_mc_5shot_test + - label: socialiqa_mc_5shot_bpb type: downstream - - label: mmlu_humanities_mc_5shot_test + - label: winogrande_mc_5shot type: downstream - - label: mmlu_social_sciences_mc_5shot_test - type: downstream - - - label: mmlu_other_mc_5shot_test + - label: winogrande_mc_5shot_bpb type: downstream - label: basic_arithmetic type: downstream - - label: trivia_qa_wiki_ppl - type: downstream - - - label: natural_qs_open_ppl - type: downstream - - - label: arc_easy_ppl + - label: hellaswag type: downstream data: