From d7e96c9e6ade5701b737d3a71aa1811a6be8b663 Mon Sep 17 00:00:00 2001 From: Stephen Bach Date: Sun, 27 Mar 2022 16:02:58 -0400 Subject: [PATCH 1/3] Always use spawn. --- promptsource/app.py | 1042 ++++++++++++++++++++++--------------------- 1 file changed, 529 insertions(+), 513 deletions(-) diff --git a/promptsource/app.py b/promptsource/app.py index ef2404f43..ed1bc7076 100644 --- a/promptsource/app.py +++ b/promptsource/app.py @@ -1,4 +1,6 @@ import argparse +import functools +import multiprocessing import textwrap from multiprocessing import Manager, Pool @@ -23,6 +25,22 @@ ) +# Python 3.8 switched the default start method from fork to spawn. OS X also has +# some issues related to fork, eee, e.g., https://github.com/bigscience-workshop/promptsource/issues/572 +# so we make sure we always use spawn for consistency +multiprocessing.set_start_method("spawn", force=True) + + +def get_infos(all_infos, d_name): + """ + Wrapper for mutliprocess-loading of dataset infos + + :param all_infos: multiprocess-safe dictionary + :param d_name: dataset name + """ + all_infos[d_name] = get_dataset_infos(d_name) + + # add an argument for read-only # At the moment, streamlit does not handle python script arguments gracefully. # Thus, for read-only mode, you have to type one of the below two: @@ -48,251 +66,77 @@ list_datasets = st.cache(list_datasets) -def reset_template_state(): - state.template_name = None - state.jinja = None - state.reference = None - - -# -# Loads session state -# -state = _get_state() - -# -# Initial page setup -# -st.set_page_config(page_title="Promptsource", layout="wide") -st.sidebar.markdown( - "
πŸ’»Github - Promptsource\n\n
", - unsafe_allow_html=True, -) -mode = st.sidebar.selectbox( - label="Choose a mode", - options=select_options, - index=0, - key="mode_select", -) -st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}") - -# -# Adds pygments styles to the page. -# -st.markdown( - "", unsafe_allow_html=True -) - -WIDTH = 140 - - -def show_jinja(t, width=WIDTH): - def replace_linebreaks(t): - """ - st.write does not handle double breaklines very well. When it encounters `\n\n`, it exit the curent
block. - Explicitely replacing all `\n` with their html equivalent to bypass this issue. - Also stripping the trailing `\n` first. - """ - return t.strip("\n").replace("\n", "
") - - wrap = textwrap.fill(t, width=width, replace_whitespace=False) - out = highlight(wrap, DjangoLexer(), HtmlFormatter()) - out = replace_linebreaks(out) - st.write(out, unsafe_allow_html=True) - - -def show_text(t, width=WIDTH, with_markdown=False): - wrap = [textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n")] - wrap = "\n".join(wrap) - if with_markdown: - st.write(wrap, unsafe_allow_html=True) - else: - st.text(wrap) - - -if mode == "Helicopter view": - st.title("High level metrics") - st.write("This will take a minute to collect.") - st.write( - "If you want to contribute, please refer to the instructions in " - + "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)." - ) - +def run_app(): # - # Loads template data + # Loads session state # - try: - template_collection = TemplateCollection() - except FileNotFoundError: - st.error( - "Unable to find the prompt folder!\n\n" - "We expect the folder to be in the working directory. " - "You might need to restart the app in the root directory of the repo." - ) - st.stop() + state = _get_state() - # - # Global metrics - # - counts = template_collection.get_templates_count() - nb_prompted_datasets = len(counts) - st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`") - nb_prompts = sum(counts.values()) - st.write(f"## Number of *prompts*: `{nb_prompts}`") + def reset_template_state(): + state.template_name = None + state.jinja = None + state.reference = None # - # Metrics per dataset/subset + # Initial page setup # - # Download dataset infos (multiprocessing download) - manager = Manager() - all_infos = manager.dict() - all_datasets = list(set([t[0] for t in template_collection.keys])) - - def get_infos(d_name): - all_infos[d_name] = get_dataset_infos(d_name) - - pool = Pool(processes=len(all_datasets)) - pool.map(get_infos, all_datasets) - pool.close() - pool.join() - - results = [] - for (dataset_name, subset_name) in template_collection.keys: - # Collect split sizes (train, validation and test) - if dataset_name not in all_infos: - infos = get_dataset_infos(dataset_name) - all_infos[dataset_name] = infos - else: - infos = all_infos[dataset_name] - if infos: - if subset_name is None: - subset_infos = infos[list(infos.keys())[0]] - else: - subset_infos = infos[subset_name] - - split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()} - else: - # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json - # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error - # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0. - split_sizes = {} - - # Collect template counts, original task counts and names - dataset_templates = template_collection.get_dataset(dataset_name, subset_name) - results.append( - { - "Dataset name": dataset_name, - "Subset name": "βˆ…" if subset_name is None else subset_name, - "Train size": split_sizes["train"] if "train" in split_sizes else 0, - "Validation size": split_sizes["validation"] if "validation" in split_sizes else 0, - "Test size": split_sizes["test"] if "test" in split_sizes else 0, - "Number of prompts": len(dataset_templates), - "Number of original task prompts": sum( - [bool(t.metadata.original_task) for t in dataset_templates.templates.values()] - ), - "Prompt names": [t.name for t in dataset_templates.templates.values()], - } - ) - results_df = pd.DataFrame(results) - results_df.sort_values(["Number of prompts"], inplace=True, ascending=False) - results_df.reset_index(drop=True, inplace=True) - - nb_training_instances = results_df["Train size"].sum() - st.write(f"## Number of *training instances*: `{nb_training_instances}`") - - plot_df = results_df[["Dataset name", "Subset name", "Train size", "Number of prompts"]].copy() - plot_df["Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"] - plot_df.sort_values(["Train size"], inplace=True, ascending=False) - fig = px.bar( - plot_df, - x="Name", - y="Train size", - hover_data=["Dataset name", "Subset name", "Number of prompts"], - log_y=True, - title="Number of training instances per data(sub)set - y-axis is in logscale", - ) - fig.update_xaxes(visible=False, showticklabels=False) - st.plotly_chart(fig, use_container_width=True) - st.write( - f"- Top 3 training subsets account for `{100 * plot_df[:3]['Train size'].sum() / nb_training_instances:.2f}%` of the training instances." - ) - biggest_training_subset = plot_df.iloc[0] - st.write( - f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances" + st.set_page_config(page_title="Promptsource", layout="wide") + st.sidebar.markdown( + "
πŸ’»Github - Promptsource\n\n
", + unsafe_allow_html=True, ) - smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1] - st.write( - f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances" + mode = st.sidebar.selectbox( + label="Choose a mode", + options=select_options, + index=0, + key="mode_select", ) - - st.markdown("***") - st.write("Details per dataset") - st.table(results_df) - -else: - # Combining mode `Prompted dataset viewer` and `Sourcing` since the - # backbone of the interfaces is the same - assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError( - f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`" - ) - - # - # Loads dataset information - # - - dataset_list = list_datasets() - ag_news_index = dataset_list.index("ag_news") + st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}") # - # Select a dataset - starts with ag_news + # Adds pygments styles to the page. # - dataset_key = st.sidebar.selectbox( - "Dataset", - dataset_list, - key="dataset_select", - index=ag_news_index, - help="Select the dataset to work on.", + st.markdown( + "", unsafe_allow_html=True ) - # - # If a particular dataset is selected, loads dataset and template information - # - if dataset_key is not None: + WIDTH = 140 - # - # Check for subconfigurations (i.e. subsets) - # - configs = get_dataset_confs(dataset_key) - conf_option = None - if len(configs) > 0: - conf_option = st.sidebar.selectbox("Subset", configs, index=0, format_func=lambda a: a.name) - - subset_name = str(conf_option.name) if conf_option else None - try: - dataset = get_dataset(dataset_key, subset_name) - except OSError as e: - st.error( - f"Some datasets are not handled automatically by `datasets` and require users to download the " - f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. " - f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. " - f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment " - f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`" - f"\n\nOriginal error:\n{str(e)}" - ) - st.stop() - - splits = list(dataset.keys()) - index = 0 - if "train" in splits: - index = splits.index("train") - split = st.sidebar.selectbox("Split", splits, key="split_select", index=index) - dataset = dataset[split] - dataset = renameDatasetColumn(dataset) + def show_jinja(t, width=WIDTH): + def replace_linebreaks(t): + """ + st.write does not handle double breaklines very well. When it encounters `\n\n`, it exit the curent
block. + Explicitely replacing all `\n` with their html equivalent to bypass this issue. + Also stripping the trailing `\n` first. + """ + return t.strip("\n").replace("\n", "
") + + wrap = textwrap.fill(t, width=width, replace_whitespace=False) + out = highlight(wrap, DjangoLexer(), HtmlFormatter()) + out = replace_linebreaks(out) + st.write(out, unsafe_allow_html=True) + + def show_text(t, width=WIDTH, with_markdown=False): + wrap = [textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n")] + wrap = "\n".join(wrap) + if with_markdown: + st.write(wrap, unsafe_allow_html=True) + else: + st.text(wrap) + + if mode == "Helicopter view": + st.title("High level metrics") + st.write("This will take a minute to collect.") + st.write( + "If you want to contribute, please refer to the instructions in " + + "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)." + ) # # Loads template data # try: - dataset_templates = DatasetTemplates(dataset_key, conf_option.name if conf_option else None) + template_collection = TemplateCollection() except FileNotFoundError: st.error( "Unable to find the prompt folder!\n\n" @@ -301,324 +145,496 @@ def get_infos(d_name): ) st.stop() - template_list = dataset_templates.all_template_names - num_templates = len(template_list) - st.sidebar.write( - "No of prompts created for " - + f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`" - + f": **{str(num_templates)}**" - ) + # + # Global metrics + # + counts = template_collection.get_templates_count() + nb_prompted_datasets = len(counts) + st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`") + nb_prompts = sum(counts.values()) + st.write(f"## Number of *prompts*: `{nb_prompts}`") - if mode == "Prompted dataset viewer": - if num_templates > 0: - template_name = st.sidebar.selectbox( - "Prompt name", - template_list, - key="template_select", - index=0, - help="Select the prompt to visualize.", - ) + # + # Metrics per dataset/subset + # + # Download dataset infos (multiprocessing download) + manager = Manager() + all_infos = manager.dict() + all_datasets = list(set([t[0] for t in template_collection.keys])) + + pool = Pool(processes=multiprocessing.cpu_count()) + pool.map(functools.partial(get_infos, all_infos), all_datasets) + pool.close() + pool.join() + + results = [] + for (dataset_name, subset_name) in template_collection.keys: + # Collect split sizes (train, validation and test) + if dataset_name not in all_infos: + infos = get_dataset_infos(dataset_name) + all_infos[dataset_name] = infos + else: + infos = all_infos[dataset_name] + if infos: + if subset_name is None: + subset_infos = infos[list(infos.keys())[0]] + else: + subset_infos = infos[subset_name] - step = 50 - example_index = st.sidebar.number_input( - f"Select the example index (Size = {len(dataset)})", - min_value=0, - max_value=len(dataset) - step, - value=0, - step=step, - key="example_index_number_input", - help="Offset = 50.", + split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()} + else: + # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json + # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error + # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0. + split_sizes = {} + + # Collect template counts, original task counts and names + dataset_templates = template_collection.get_dataset(dataset_name, subset_name) + results.append( + { + "Dataset name": dataset_name, + "Subset name": "βˆ…" if subset_name is None else subset_name, + "Train size": split_sizes["train"] if "train" in split_sizes else 0, + "Validation size": split_sizes["validation"] if "validation" in split_sizes else 0, + "Test size": split_sizes["test"] if "test" in split_sizes else 0, + "Number of prompts": len(dataset_templates), + "Number of original task prompts": sum( + [bool(t.metadata.original_task) for t in dataset_templates.templates.values()] + ), + "Prompt names": [t.name for t in dataset_templates.templates.values()], + } ) - else: # mode = Sourcing - st.sidebar.subheader("Select Example") - example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1) - - example = dataset[example_index] - example = removeHyphen(example) + results_df = pd.DataFrame(results) + results_df.sort_values(["Number of prompts"], inplace=True, ascending=False) + results_df.reset_index(drop=True, inplace=True) + + nb_training_instances = results_df["Train size"].sum() + st.write(f"## Number of *training instances*: `{nb_training_instances}`") + + plot_df = results_df[["Dataset name", "Subset name", "Train size", "Number of prompts"]].copy() + plot_df["Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"] + plot_df.sort_values(["Train size"], inplace=True, ascending=False) + fig = px.bar( + plot_df, + x="Name", + y="Train size", + hover_data=["Dataset name", "Subset name", "Number of prompts"], + log_y=True, + title="Number of training instances per data(sub)set - y-axis is in logscale", + ) + fig.update_xaxes(visible=False, showticklabels=False) + st.plotly_chart(fig, use_container_width=True) + st.write( + f"- Top 3 training subsets account for `{100 * plot_df[:3]['Train size'].sum() / nb_training_instances:.2f}%` of the training instances." + ) + biggest_training_subset = plot_df.iloc[0] + st.write( + f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances" + ) + smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1] + st.write( + f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances" + ) - st.sidebar.write(example) + st.markdown("***") + st.write("Details per dataset") + st.table(results_df) - st.sidebar.subheader("Dataset Schema") - rendered_features = render_features(dataset.features) - st.sidebar.write(rendered_features) + else: + # Combining mode `Prompted dataset viewer` and `Sourcing` since the + # backbone of the interfaces is the same + assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError( + f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`" + ) # - # Display dataset information + # Loads dataset information # - st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else "")) - - # If we have a custom dataset change the source link to the hub - split_dataset_key = dataset_key.split("/") - possible_user = split_dataset_key[0] - if len(split_dataset_key) > 1 and possible_user in INCLUDED_USERS: - source_link = "https://huggingface.co/datasets/%s/blob/main/%s.py" % ( - dataset_key, - split_dataset_key[-1], - ) - else: - source_link = "https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py" % ( - dataset_key, - dataset_key, - ) - st.markdown("*Homepage*: " + dataset.info.homepage + "\n\n*Dataset*: " + source_link) + dataset_list = list_datasets() + ag_news_index = dataset_list.index("ag_news") - md = """ - %s - """ % ( - dataset.info.description.replace("\\", "") if dataset_key else "" + # + # Select a dataset - starts with ag_news + # + dataset_key = st.sidebar.selectbox( + "Dataset", + dataset_list, + key="dataset_select", + index=ag_news_index, + help="Select the dataset to work on.", ) - st.markdown(md) # - # Body of the app: display prompted examples in mode `Prompted dataset viewer` - # or text boxes to create new prompts in mode `Sourcing` + # If a particular dataset is selected, loads dataset and template information # - if mode == "Prompted dataset viewer": + if dataset_key is not None: + # - # Display template information + # Check for subconfigurations (i.e. subsets) # - if num_templates > 0: - template = dataset_templates[template_name] - st.subheader("Prompt") - st.markdown("##### Name") - st.text(template.name) - st.markdown("##### Reference") - st.text(template.reference) - st.markdown("##### Original Task? ") - st.text(template.metadata.original_task) - st.markdown("##### Choices in template? ") - st.text(template.metadata.choices_in_prompt) - st.markdown("##### Metrics") - st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None) - st.markdown("##### Answer Choices") - if template.get_answer_choices_expr() is not None: - show_jinja(template.get_answer_choices_expr()) - else: - st.text(None) - st.markdown("##### Jinja template") - splitted_template = template.jinja.split("|||") - st.markdown("###### Input template") - show_jinja(splitted_template[0].strip()) - if len(splitted_template) > 1: - st.markdown("###### Target template") - show_jinja(splitted_template[1].strip()) - st.markdown("***") + configs = get_dataset_confs(dataset_key) + conf_option = None + if len(configs) > 0: + conf_option = st.sidebar.selectbox("Subset", configs, index=0, format_func=lambda a: a.name) + + subset_name = str(conf_option.name) if conf_option else None + try: + dataset = get_dataset(dataset_key, subset_name) + except OSError as e: + st.error( + f"Some datasets are not handled automatically by `datasets` and require users to download the " + f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. " + f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. " + f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment " + f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`" + f"\n\nOriginal error:\n{str(e)}" + ) + st.stop() + + splits = list(dataset.keys()) + index = 0 + if "train" in splits: + index = splits.index("train") + split = st.sidebar.selectbox("Split", splits, key="split_select", index=index) + dataset = dataset[split] + dataset = renameDatasetColumn(dataset) # - # Display a couple (steps) examples + # Loads template data # - for ex_idx in range(example_index, example_index + step): - if ex_idx >= len(dataset): - continue - example = dataset[ex_idx] - example = removeHyphen(example) - col1, _, col2 = st.beta_columns([12, 1, 12]) - with col1: - st.write(example) + try: + dataset_templates = DatasetTemplates(dataset_key, conf_option.name if conf_option else None) + except FileNotFoundError: + st.error( + "Unable to find the prompt folder!\n\n" + "We expect the folder to be in the working directory. " + "You might need to restart the app in the root directory of the repo." + ) + st.stop() + + template_list = dataset_templates.all_template_names + num_templates = len(template_list) + st.sidebar.write( + "No of prompts created for " + + f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`" + + f": **{str(num_templates)}**" + ) + + if mode == "Prompted dataset viewer": if num_templates > 0: - with col2: - prompt = template.apply(example, highlight_variables=False) - if prompt == [""]: - st.write("βˆ…βˆ…βˆ… *Blank result*") - else: - st.write("Input") - show_text(prompt[0]) - if len(prompt) > 1: - st.write("Target") - show_text(prompt[1]) - st.markdown("***") - else: # mode = Sourcing - st.markdown("## Prompt Creator") + template_name = st.sidebar.selectbox( + "Prompt name", + template_list, + key="template_select", + index=0, + help="Select the prompt to visualize.", + ) + + step = 50 + example_index = st.sidebar.number_input( + f"Select the example index (Size = {len(dataset)})", + min_value=0, + max_value=len(dataset) - step, + value=0, + step=step, + key="example_index_number_input", + help="Offset = 50.", + ) + else: # mode = Sourcing + st.sidebar.subheader("Select Example") + example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1) + + example = dataset[example_index] + example = removeHyphen(example) + + st.sidebar.write(example) + + st.sidebar.subheader("Dataset Schema") + rendered_features = render_features(dataset.features) + st.sidebar.write(rendered_features) # - # Create a new template or select an existing one + # Display dataset information # - col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6]) - - # current_templates_key and state.templates_key are keys for the templates object - current_templates_key = (dataset_key, conf_option.name if conf_option else None) - - # Resets state if there has been a change in templates_key - if state.templates_key != current_templates_key: - state.templates_key = current_templates_key - reset_template_state() - - with col1a, st.form("new_template_form"): - new_template_name = st.text_input( - "Create a New Prompt", - key="new_template", - value="", - help="Enter name and hit enter to create a new prompt.", + st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else "")) + + # If we have a custom dataset change the source link to the hub + split_dataset_key = dataset_key.split("/") + possible_user = split_dataset_key[0] + if len(split_dataset_key) > 1 and possible_user in INCLUDED_USERS: + source_link = "https://huggingface.co/datasets/%s/blob/main/%s.py" % ( + dataset_key, + split_dataset_key[-1], ) - new_template_submitted = st.form_submit_button("Create") - if new_template_submitted: - if new_template_name in dataset_templates.all_template_names: - st.error( - f"A prompt with the name {new_template_name} already exists " - f"for dataset {state.templates_key}." - ) - elif new_template_name == "": - st.error("Need to provide a prompt name.") - else: - template = Template(new_template_name, "", "") - dataset_templates.add_template(template) - reset_template_state() - state.template_name = new_template_name - else: - state.new_template_name = None - - with col1b, st.beta_expander("or Select Prompt", expanded=True): - template_list = dataset_templates.all_template_names - if state.template_name: - index = template_list.index(state.template_name) - else: - index = 0 - state.template_name = st.selectbox( - "", template_list, key="template_select", index=index, help="Select the prompt to work on." + else: + source_link = "https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py" % ( + dataset_key, + dataset_key, ) - if st.button("Delete Prompt", key="delete_prompt"): - dataset_templates.remove_template(state.template_name) - reset_template_state() + st.markdown("*Homepage*: " + dataset.info.homepage + "\n\n*Dataset*: " + source_link) - variety_guideline = """ - :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged. - Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation. - \r**To get various prompts, you can try moving the cursor along theses axes**: - \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input. - \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end? - \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive. - """ - - col1, _, _ = st.beta_columns([18, 1, 6]) - with col1: - if state.template_name is not None: - show_text(variety_guideline, with_markdown=True) + md = """ + %s + """ % ( + dataset.info.description.replace("\\", "") if dataset_key else "" + ) + st.markdown(md) # - # Edit the created or selected template + # Body of the app: display prompted examples in mode `Prompted dataset viewer` + # or text boxes to create new prompts in mode `Sourcing` # - col1, _, col2 = st.beta_columns([18, 1, 6]) - with col1: - if state.template_name is not None: - template = dataset_templates[state.template_name] - # - # If template is selected, displays template editor - # - with st.form("edit_template_form"): - updated_template_name = st.text_input("Name", value=template.name) - state.reference = st.text_input( - "Prompt Reference", - help="Short description of the prompt and/or paper reference for the prompt.", - value=template.reference, - ) - - # Metadata - state.metadata = template.metadata - state.metadata.original_task = st.checkbox( - "Original Task?", - value=template.metadata.original_task, - help="Prompt asks model to perform the original task designed for this dataset.", - ) - state.metadata.choices_in_prompt = st.checkbox( - "Choices in Template?", - value=template.metadata.choices_in_prompt, - help="Prompt explicitly lists choices in the template for the output.", - ) - - # Metrics from here: - # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py - metrics_choices = [ - "BLEU", - "ROUGE", - "Squad", - "Trivia QA", - "Accuracy", - "Pearson Correlation", - "Spearman Correlation", - "MultiRC", - "AUC", - "COQA F1", - "Edit Distance", - ] - # Add mean reciprocal rank - metrics_choices.append("Mean Reciprocal Rank") - # Add generic other - metrics_choices.append("Other") - # Sort alphabetically - metrics_choices = sorted(metrics_choices) - state.metadata.metrics = st.multiselect( - "Metrics", - metrics_choices, - default=template.metadata.metrics, - help="Select all metrics that are commonly used (or should " - "be used if a new task) to evaluate this prompt.", - ) - - # Answer choices - if template.get_answer_choices_expr() is not None: - answer_choices = template.get_answer_choices_expr() + if mode == "Prompted dataset viewer": + # + # Display template information + # + if num_templates > 0: + template = dataset_templates[template_name] + st.subheader("Prompt") + st.markdown("##### Name") + st.text(template.name) + st.markdown("##### Reference") + st.text(template.reference) + st.markdown("##### Original Task? ") + st.text(template.metadata.original_task) + st.markdown("##### Choices in template? ") + st.text(template.metadata.choices_in_prompt) + st.markdown("##### Metrics") + st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None) + st.markdown("##### Answer Choices") + if template.get_answer_choices_expr() is not None: + show_jinja(template.get_answer_choices_expr()) + else: + st.text(None) + st.markdown("##### Jinja template") + splitted_template = template.jinja.split("|||") + st.markdown("###### Input template") + show_jinja(splitted_template[0].strip()) + if len(splitted_template) > 1: + st.markdown("###### Target template") + show_jinja(splitted_template[1].strip()) + st.markdown("***") + + # + # Display a couple (steps) examples + # + for ex_idx in range(example_index, example_index + step): + if ex_idx >= len(dataset): + continue + example = dataset[ex_idx] + example = removeHyphen(example) + col1, _, col2 = st.beta_columns([12, 1, 12]) + with col1: + st.write(example) + if num_templates > 0: + with col2: + prompt = template.apply(example, highlight_variables=False) + if prompt == [""]: + st.write("βˆ…βˆ…βˆ… *Blank result*") + else: + st.write("Input") + show_text(prompt[0]) + if len(prompt) > 1: + st.write("Target") + show_text(prompt[1]) + st.markdown("***") + else: # mode = Sourcing + st.markdown("## Prompt Creator") + + # + # Create a new template or select an existing one + # + col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6]) + + # current_templates_key and state.templates_key are keys for the templates object + current_templates_key = (dataset_key, conf_option.name if conf_option else None) + + # Resets state if there has been a change in templates_key + if state.templates_key != current_templates_key: + state.templates_key = current_templates_key + reset_template_state() + + with col1a, st.form("new_template_form"): + new_template_name = st.text_input( + "Create a New Prompt", + key="new_template", + value="", + help="Enter name and hit enter to create a new prompt.", + ) + new_template_submitted = st.form_submit_button("Create") + if new_template_submitted: + if new_template_name in dataset_templates.all_template_names: + st.error( + f"A prompt with the name {new_template_name} already exists " + f"for dataset {state.templates_key}." + ) + elif new_template_name == "": + st.error("Need to provide a prompt name.") else: - answer_choices = "" - state.answer_choices = st.text_input( - "Answer Choices", - value=answer_choices, - help="A Jinja expression for computing answer choices. " - "Separate choices with a triple bar (|||).", - ) - - # Jinja - state.jinja = st.text_area("Template", height=40, value=template.jinja) - - # Submit form - if st.form_submit_button("Save"): - if ( - updated_template_name in dataset_templates.all_template_names - and updated_template_name != state.template_name - ): - st.error( - f"A prompt with the name {updated_template_name} already exists " - f"for dataset {state.templates_key}." - ) - elif updated_template_name == "": - st.error("Need to provide a prompt name.") + template = Template(new_template_name, "", "") + dataset_templates.add_template(template) + reset_template_state() + state.template_name = new_template_name + else: + state.new_template_name = None + + with col1b, st.beta_expander("or Select Prompt", expanded=True): + template_list = dataset_templates.all_template_names + if state.template_name: + index = template_list.index(state.template_name) + else: + index = 0 + state.template_name = st.selectbox( + "", template_list, key="template_select", index=index, help="Select the prompt to work on." + ) + + if st.button("Delete Prompt", key="delete_prompt"): + dataset_templates.remove_template(state.template_name) + reset_template_state() + + variety_guideline = """ + :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged. + Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation. + \r**To get various prompts, you can try moving the cursor along theses axes**: + \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input. + \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end? + \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive. + """ + + col1, _, _ = st.beta_columns([18, 1, 6]) + with col1: + if state.template_name is not None: + show_text(variety_guideline, with_markdown=True) + + # + # Edit the created or selected template + # + col1, _, col2 = st.beta_columns([18, 1, 6]) + with col1: + if state.template_name is not None: + template = dataset_templates[state.template_name] + # + # If template is selected, displays template editor + # + with st.form("edit_template_form"): + updated_template_name = st.text_input("Name", value=template.name) + state.reference = st.text_input( + "Prompt Reference", + help="Short description of the prompt and/or paper reference for the prompt.", + value=template.reference, + ) + + # Metadata + state.metadata = template.metadata + state.metadata.original_task = st.checkbox( + "Original Task?", + value=template.metadata.original_task, + help="Prompt asks model to perform the original task designed for this dataset.", + ) + state.metadata.choices_in_prompt = st.checkbox( + "Choices in Template?", + value=template.metadata.choices_in_prompt, + help="Prompt explicitly lists choices in the template for the output.", + ) + + # Metrics from here: + # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py + metrics_choices = [ + "BLEU", + "ROUGE", + "Squad", + "Trivia QA", + "Accuracy", + "Pearson Correlation", + "Spearman Correlation", + "MultiRC", + "AUC", + "COQA F1", + "Edit Distance", + ] + # Add mean reciprocal rank + metrics_choices.append("Mean Reciprocal Rank") + # Add generic other + metrics_choices.append("Other") + # Sort alphabetically + metrics_choices = sorted(metrics_choices) + state.metadata.metrics = st.multiselect( + "Metrics", + metrics_choices, + default=template.metadata.metrics, + help="Select all metrics that are commonly used (or should " + "be used if a new task) to evaluate this prompt.", + ) + + # Answer choices + if template.get_answer_choices_expr() is not None: + answer_choices = template.get_answer_choices_expr() else: - # Parses state.answer_choices - if state.answer_choices == "": - updated_answer_choices = None + answer_choices = "" + state.answer_choices = st.text_input( + "Answer Choices", + value=answer_choices, + help="A Jinja expression for computing answer choices. " + "Separate choices with a triple bar (|||).", + ) + + # Jinja + state.jinja = st.text_area("Template", height=40, value=template.jinja) + + # Submit form + if st.form_submit_button("Save"): + if ( + updated_template_name in dataset_templates.all_template_names + and updated_template_name != state.template_name + ): + st.error( + f"A prompt with the name {updated_template_name} already exists " + f"for dataset {state.templates_key}." + ) + elif updated_template_name == "": + st.error("Need to provide a prompt name.") else: - updated_answer_choices = state.answer_choices - - dataset_templates.update_template( - state.template_name, - updated_template_name, - state.jinja, - state.reference, - state.metadata, - updated_answer_choices, - ) - # Update the state as well - state.template_name = updated_template_name - # - # Displays template output on current example if a template is selected - # (in second column) - # - with col2: - if state.template_name is not None: - st.empty() - template = dataset_templates[state.template_name] - prompt = template.apply(example) - if prompt == [""]: - st.write("βˆ…βˆ…βˆ… *Blank result*") - else: - st.write("Input") - show_text(prompt[0], width=40) - if len(prompt) > 1: - st.write("Target") - show_text(prompt[1], width=40) + # Parses state.answer_choices + if state.answer_choices == "": + updated_answer_choices = None + else: + updated_answer_choices = state.answer_choices + + dataset_templates.update_template( + state.template_name, + updated_template_name, + state.jinja, + state.reference, + state.metadata, + updated_answer_choices, + ) + # Update the state as well + state.template_name = updated_template_name + # + # Displays template output on current example if a template is selected + # (in second column) + # + with col2: + if state.template_name is not None: + st.empty() + template = dataset_templates[state.template_name] + prompt = template.apply(example) + if prompt == [""]: + st.write("βˆ…βˆ…βˆ… *Blank result*") + else: + st.write("Input") + show_text(prompt[0], width=40) + if len(prompt) > 1: + st.write("Target") + show_text(prompt[1], width=40) -# -# Must sync state at end -# -state.sync() + # + # Must sync state at end + # + state.sync() + + +if __name__ == "__main__": + run_app() From 95b4b8366764004913c33388bf604748eb0fa60d Mon Sep 17 00:00:00 2001 From: Stephen Bach Date: Sun, 27 Mar 2022 16:15:38 -0400 Subject: [PATCH 2/3] Relax python version requirements. --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d922d983f..bc8e0a9dd 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ url='https://github.com/bigscience-workshop/promptsource.git', author='BigScience - Prompt Engineering Working Group', author_email='sbach@cs.brown.edu,victor@huggingface.co', - python_requires='>=3.7, <3.8', + python_requires='>=3.7', install_requires=requirements, classifiers=[ 'Development Status :: 2 - Pre-Alpha', @@ -39,6 +39,8 @@ 'Natural Language :: English', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', ], description='An Integrated Development Environment and Repository for Natural Language Prompts.', packages=find_packages(), From 475206a2798fd2b27e559f82044c8eef62bbe739 Mon Sep 17 00:00:00 2001 From: Stephen Bach Date: Mon, 18 Apr 2022 10:42:02 -0400 Subject: [PATCH 3/3] Update setup.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 姜 倩戩 Mike Tian-Jian Jiang --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bc8e0a9dd..621694f7a 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ url='https://github.com/bigscience-workshop/promptsource.git', author='BigScience - Prompt Engineering Working Group', author_email='sbach@cs.brown.edu,victor@huggingface.co', - python_requires='>=3.7', + python_requires='>=3.7,<3.10', install_requires=requirements, classifiers=[ 'Development Status :: 2 - Pre-Alpha',