Skip to content

Commit

Permalink
Remove priority filter (#517)
Browse files Browse the repository at this point in the history
* templates for tweets_hate_speech

* Update templates.yaml

* Create templates.yaml

* deleting tweets template

* removing priority filter

* quality changes

* Update utils.py

Co-authored-by: Shanya Sharma - s0s0cr3 <[email protected]>
  • Loading branch information
shanyas10 and Shanya Sharma - s0s0cr3 authored Oct 12, 2021
1 parent 420e8ed commit 8a518eb
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 283 deletions.
27 changes: 0 additions & 27 deletions promptsource/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,27 +216,9 @@ def get_infos(d_name):
#
# Loads dataset information
#
if mode == "Prompted dataset viewer":
priority_filter = False
priority_max_templates = None
else: # mode = Sourcing
priority_filter = st.sidebar.checkbox(
"Filter Priority Datasets", help="This filter surfaces datasets with less than X prompts."
)
if priority_filter:
priority_max_templates = st.sidebar.number_input(
"Max no of templates per dataset", min_value=1, max_value=50, value=2, step=1
)
else:
# Clear working priority dataset retained in the
# priority list with more than priority_max_templates
state.working_priority_ds = None
priority_max_templates = None

dataset_list = list_datasets(
template_collection,
priority_filter,
priority_max_templates,
state,
)

Expand All @@ -251,12 +233,6 @@ def get_infos(d_name):
help="Select the dataset to work on.",
)

if mode == "Sourcing":
# On dataset change, clear working priority dataset
# retained in the priority list with more than priority_max_templates
if dataset_key != state.working_priority_ds:
state.working_priority_ds = None

#
# If a particular dataset is selected, loads dataset and template information
#
Expand Down Expand Up @@ -447,9 +423,6 @@ def get_infos(d_name):
dataset_templates.add_template(template)
reset_template_state()
state.template_name = new_template_name
# Keep the current working dataset in priority list
if priority_filter:
state.working_priority_ds = dataset_key
else:
state.new_template_name = None

Expand Down
258 changes: 2 additions & 256 deletions promptsource/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,262 +131,8 @@ def filter_english_datasets():
return sorted(english_datasets)


def list_datasets(template_collection, _priority_filter, _priority_max_templates, _state):
def list_datasets(template_collection, _state):
"""Get all the datasets to work with."""
dataset_list = filter_english_datasets()
count_dict = template_collection.get_templates_count()
if _priority_filter:
dataset_list = list(
set(dataset_list)
- set(
list(
d
for d in count_dict
if count_dict[d] > _priority_max_templates and d != _state.working_priority_ds
)
)
)
dataset_list.sort()
else:
dataset_list.sort(key=lambda x: DATASET_ORDER.get(x, 1000))
dataset_list.sort(key=lambda x: x.lower())
return dataset_list


DATASET_ORDER = dict(
[
("glue", 0),
("squad", 1),
("bookcorpusopen", 2),
("wikipedia", 3),
("wikitext", 4),
("imdb", 5),
("super_glue", 6),
("cnn_dailymail", 7),
("openwebtext", 8),
("common_voice", 9),
("xsum", 10),
("wmt16", 11),
("conll2003", 12),
("ag_news", 13),
("universal_dependencies", 14),
("wiki_qa", 15),
("bookcorpus", 16),
("wiki40b", 17),
("wiki_dpr", 18),
("xnli", 19),
("squad_kor_v1", 20),
("emotion", 21),
("wikiann", 22),
("amazon_us_reviews", 23),
("squad_v2", 24),
("amazon_reviews_multi", 25),
("librispeech_asr", 26),
("blimp", 27),
("scitail", 28),
("anli", 29),
("samsum", 30),
("lambada", 31),
("multi_nli", 32),
("daily_dialog", 33),
("snli", 34),
("opus_euconst", 35),
("rotten_tomatoes", 36),
("scientific_papers", 37),
("trec", 38),
("reddit_tifu", 39),
("ai2_arc", 40),
("patrickvonplaten", 41),
("gigaword", 42),
("swag", 43),
("timit_asr", 44),
("oscar", 45),
("tweet_eval", 46),
("newsgroup", 47),
("billsum", 48),
("gem", 49),
("blended_skill_talk", 50),
("eli5", 51),
("ade_corpus_v2", 52),
("race", 53),
("wikihow", 54),
("piqa", 55),
("xtreme", 56),
("commonsense_qa", 57),
("wiki_snippets", 58),
("mlsum", 59),
("multi_news", 60),
("wmt14", 61),
("asnq", 62),
("toriving", 63),
("crime_and_punish", 64),
("few_rel", 65),
("code_search_net", 66),
("universal_morphologies", 67),
("ms_marco", 68),
("trivia_qa", 69),
("lama", 70),
("newsroom", 71),
("hellaswag", 72),
("adversarial_qa", 73),
("hatexplain", 74),
("hans", 75),
("kilt_tasks", 76),
("xglue", 77),
("amazon_polarity", 78),
("meta_woz", 79),
("opus_books", 80),
("wmt18", 81),
("covid_qa_deepset", 82),
("emotion\\dataset_infos.json", 83),
("wmt19", 84),
("discofuse", 85),
("mrqa", 86),
("winogrande", 87),
("go_emotions", 88),
("tydiqa", 89),
("yelp_polarity", 90),
("banking77", 91),
("math_dataset", 92),
("pubmed_qa", 93),
("opus_ubuntu", 94),
("acronym_identification", 95),
("math_qa", 96),
("babi_qa", 97),
("dbpedia_14", 98),
("ted_multi", 99),
("allocine", 100),
("hotpot_qa", 101),
("cc_news", 102),
("conll2002", 103),
("cuad", 104),
("mc_taco", 105),
("silicone", 106),
("discovery", 107),
("mt_eng_vietnamese", 108),
("quac", 109),
("conllpp", 110),
("ubuntu_dialogs_corpus", 111),
("esnli", 112),
("doc2dial", 113),
("squad_kor_v2", 114),
("opus_gnome", 115),
("german_legal_entity_recognition", 116),
("openbookqa", 117),
("tapaco", 118),
("xquad_r", 119),
("imdb\\dataset_infos.json", 120),
("opus_wikipedia", 121),
("amr", 122),
("wnut_17", 123),
("empathetic_dialogues", 124),
("cbt", 125),
("opus_rf", 126),
("narrativeqa", 127),
("mnist", 128),
("sick", 129),
("swda", 130),
("aeslc", 131),
("art", 132),
("coqa", 133),
("opus100", 134),
("sst", 135),
("big_patent", 136),
("germeval_14", 137),
("liar", 138),
("un_pc", 139),
("alt", 140),
("circa", 141),
("scan", 142),
("wikisql", 143),
("reddit", 144),
("wino_bias", 145),
("financial_phrasebank", 146),
("social_i_qa", 147),
("newsqa", 148),
("cosmos_qa", 149),
("classla", 150),
("scicite", 151),
("codah", 152),
("ehealth_kd", 153),
("wikicorpus", 154),
("ccaligned_multilingual", 155),
("cos_e", 156),
("thaisum", 157),
("cfq", 158),
("yahoo_answers_topics", 159),
("wmt", 160),
("natural_questions", 161),
("cc100", 162),
("paws", 163),
("boolq", 164),
("break_data", 165),
("pragmeval", 166),
("arabic_speech_corpus", 167),
("text\\dataset_infos.json", 168),
("md_gender_bias", 169),
("mlqa", 170),
("arabic_billion_words", 171),
("dialog_re", 172),
("tweets_hate_speech_detection", 173),
("ecthr_cases", 174),
("json\\dataset_infos.json", 175),
("conv_ai_2", 176),
("dream", 177),
("kor_ner", 178),
("youtube_caption_corrections", 179),
("spider", 180),
("air_dialogue", 181),
("arxiv_dataset", 182),
("data", 183),
("quora", 184),
("docred", 185),
("guardian_authorship", 186),
("quartz", 187),
("yelp_review_full", 188),
("xquad", 189),
("ted_talks_iwslt", 190),
("orange_sum", 191),
("indonlu", 192),
("tweet_qa", 193),
("multi_woz_v22", 194),
("s2orc", 195),
("clarin-pl", 196),
("cord19", 197),
("emo", 198),
("indic_glue", 199),
("ethos", 200),
("persiannlp", 201),
("totto", 202),
("wongnai_reviews", 203),
("bavard", 204),
("europa_ecdc_tm", 205),
("google_wellformed_query", 206),
("paws-x", 207),
("emea", 208),
("fever", 209),
("asset", 210),
("kilt_wikipedia", 211),
("clinc_oos", 212),
("conv_ai_3", 213),
("ncbi_disease", 214),
("sentiment140", 215),
("quarel", 216),
("txt", 217),
("ajgt_twitter_ar", 218),
("ambig_qa", 219),
("ptb_text_only", 220),
("stsb_multi_mt", 221),
("web_questions", 222),
("winograd_wsc", 223),
("eurlex", 224),
("muchocine", 225),
("app_reviews", 226),
("aqua_rat", 227),
("bible_para", 228),
("wiki_auto", 229),
("cifar10", 230),
("eli5\\dataset_infos.json", 231),
("quail", 232),
("hyperpartisan_news_detection", 233),
]
)

0 comments on commit 8a518eb

Please sign in to comment.