Skip to content

Commit

Permalink
Better evals for the 13B anneals
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkgr committed Oct 30, 2024
1 parent 2246140 commit 43c8480
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,94 +139,76 @@ evaluators:
##########################
# Downstream evaluations #
##########################
- label: piqa
- label: mmlu_stem_mc_5shot
type: downstream

- label: hellaswag
- label: mmlu_humanities_mc_5shot
type: downstream

- label: winogrande
- label: mmlu_social_sciences_mc_5shot
type: downstream

- label: openbook_qa
- label: mmlu_other_mc_5shot
type: downstream

- label: boolq
- label: arc_challenge_mc_5shot
type: downstream

- label: sciq
- label: arc_challenge_mc_5shot_bpb
type: downstream

- label: arc_easy
- label: arc_easy_mc_5shot
type: downstream

- label: arc_challenge
- label: arc_easy_mc_5shot_bpb
type: downstream

- label: copa
- label: boolq_mc_5shot
type: downstream

#- label: rte
# type: downstream

#- label: commitment_bank
# type: downstream

#- label: sst2
# type: downstream

- label: commonsense_qa
- label: boolq_mc_5shot_bpb
type: downstream

- label: social_iqa
- label: csqa_mc_5shot
type: downstream

- label: mmlu_stem_var
- label: csqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_var
- label: hellaswag_mc_5shot
type: downstream

- label: mmlu_social_sciences_var
- label: hellaswag_mc_5shot_bpb
type: downstream

- label: mmlu_other_var
- label: openbookqa_mc_5shot
type: downstream

- label: mmlu_stem_mc_5shot
- label: openbookqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_mc_5shot
- label: piqa_mc_5shot
type: downstream

- label: mmlu_social_sciences_mc_5shot
- label: piqa_mc_5shot_bpb
type: downstream

- label: mmlu_other_mc_5shot
- label: socialiqa_mc_5shot
type: downstream

- label: mmlu_stem_mc_5shot_test
- label: socialiqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_mc_5shot_test
- label: winogrande_mc_5shot
type: downstream

- label: mmlu_social_sciences_mc_5shot_test
type: downstream

- label: mmlu_other_mc_5shot_test
- label: winogrande_mc_5shot_bpb
type: downstream

- label: basic_arithmetic
type: downstream

- label: trivia_qa_wiki_ppl
type: downstream

- label: natural_qs_open_ppl
type: downstream

- label: arc_easy_ppl
- label: hellaswag
type: downstream

data:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,94 +139,76 @@ evaluators:
##########################
# Downstream evaluations #
##########################
- label: piqa
- label: mmlu_stem_mc_5shot
type: downstream

- label: hellaswag
- label: mmlu_humanities_mc_5shot
type: downstream

- label: winogrande
- label: mmlu_social_sciences_mc_5shot
type: downstream

- label: openbook_qa
- label: mmlu_other_mc_5shot
type: downstream

- label: boolq
- label: arc_challenge_mc_5shot
type: downstream

- label: sciq
- label: arc_challenge_mc_5shot_bpb
type: downstream

- label: arc_easy
- label: arc_easy_mc_5shot
type: downstream

- label: arc_challenge
- label: arc_easy_mc_5shot_bpb
type: downstream

- label: copa
- label: boolq_mc_5shot
type: downstream

#- label: rte
# type: downstream

#- label: commitment_bank
# type: downstream

#- label: sst2
# type: downstream

- label: commonsense_qa
- label: boolq_mc_5shot_bpb
type: downstream

- label: social_iqa
- label: csqa_mc_5shot
type: downstream

- label: mmlu_stem_var
- label: csqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_var
- label: hellaswag_mc_5shot
type: downstream

- label: mmlu_social_sciences_var
- label: hellaswag_mc_5shot_bpb
type: downstream

- label: mmlu_other_var
- label: openbookqa_mc_5shot
type: downstream

- label: mmlu_stem_mc_5shot
- label: openbookqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_mc_5shot
- label: piqa_mc_5shot
type: downstream

- label: mmlu_social_sciences_mc_5shot
- label: piqa_mc_5shot_bpb
type: downstream

- label: mmlu_other_mc_5shot
- label: socialiqa_mc_5shot
type: downstream

- label: mmlu_stem_mc_5shot_test
- label: socialiqa_mc_5shot_bpb
type: downstream

- label: mmlu_humanities_mc_5shot_test
- label: winogrande_mc_5shot
type: downstream

- label: mmlu_social_sciences_mc_5shot_test
type: downstream

- label: mmlu_other_mc_5shot_test
- label: winogrande_mc_5shot_bpb
type: downstream

- label: basic_arithmetic
type: downstream

- label: trivia_qa_wiki_ppl
type: downstream

- label: natural_qs_open_ppl
type: downstream

- label: arc_easy_ppl
- label: hellaswag
type: downstream

data:
Expand Down

0 comments on commit 43c8480

Please sign in to comment.