From a62351deccca5340e85d9b6453031a052a02c8cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 18:00:18 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ice/agent.py | 3 +-- ice/cache.py | 1 + ice/evaluation/evaluate_recipe_result.py | 20 ++++++++++--------- ice/evaluation/evaluation_report.py | 18 ++++++++--------- ice/evaluation/summarize_experiment_evals.py | 16 +++++++++------ ice/metrics/gold_paragraphs.py | 1 + ice/metrics/gold_standards.py | 12 ++++------- ice/paper.py | 6 +++--- ice/recipes/blinding_dynamic.py | 7 ++++--- ice/recipes/consort_flow/baselines.py | 8 +++++--- ice/recipes/consort_flow/golds.py | 8 +++++--- .../recipes/best_passages.py | 14 +++++++------ .../recipes/name_experiments.py | 16 ++++++++------- .../meta/eval_paper_qa/common_baselines.py | 8 +++++--- ice/recipes/meta/eval_paper_qa/types.py | 9 +++------ ice/recipes/placebo_dialogs.py | 8 +++++--- ice/recipes/placebo_keyword_baseline.py | 8 +++++--- .../primer/answer_by_dispatch/types.py | 3 +-- ice/recipes/primer/sequential_action.py | 9 +++------ .../program_search/nodes/decontext/prompts.py | 16 +++++++++------ .../program_search/nodes/select/prompts.py | 8 +++++--- .../program_search/nodes/select/select.py | 8 +++++--- ice/recipes/program_search/types.py | 3 +-- .../program_search/utils/find_examples.py | 6 +++--- ice/trace.py | 3 +-- 25 files changed, 118 insertions(+), 101 deletions(-) diff --git a/ice/agent.py b/ice/agent.py index ae3c6b6e..7ee5a951 100644 --- a/ice/agent.py +++ b/ice/agent.py @@ -21,8 +21,7 @@ except ImportError: class Tfew(Agent): - def __init__(self, *args, **kwargs): - ... + def __init__(self, *args, **kwargs): ... MACHINE_AGENTS = { diff --git a/ice/cache.py b/ice/cache.py index 4a72ee6f..60936cc5 100644 --- a/ice/cache.py +++ b/ice/cache.py @@ -1,6 +1,7 @@ """ Decorator for caching function results to disk """ + import asyncio import functools import inspect diff --git a/ice/evaluation/evaluate_recipe_result.py b/ice/evaluation/evaluate_recipe_result.py index 824ea578..6464dea5 100644 --- a/ice/evaluation/evaluate_recipe_result.py +++ b/ice/evaluation/evaluate_recipe_result.py @@ -138,9 +138,7 @@ def __str__(self) -> str: correctness = ( "Correct" if self.is_correct == True - else "Incorrect" - if self.is_correct == False - else "Not evaluated" + else "Incorrect" if self.is_correct == False else "Not evaluated" ) return f"""{correctness}. - Predicted: {self.predicted} @@ -196,13 +194,17 @@ def evaluated_classifications(self) -> list[EvaluatedClassification]: for i in range(0, max(len(recipe_classifications), len(gold_classifications))): evaluated_classification = EvaluatedClassification( - predicted=recipe_classifications[i] - if i < len(recipe_classifications) - else None, + predicted=( + recipe_classifications[i] + if i < len(recipe_classifications) + else None + ), gold=gold_classifications[i] if i < len(gold_classifications) else None, - classification_eq=self.classification_eq[i] - if i < len(self.classification_eq) - else None, + classification_eq=( + self.classification_eq[i] + if i < len(self.classification_eq) + else None + ), ) evaluated_classifications.append(evaluated_classification) diff --git a/ice/evaluation/evaluation_report.py b/ice/evaluation/evaluation_report.py index c41d1791..34727bae 100644 --- a/ice/evaluation/evaluation_report.py +++ b/ice/evaluation/evaluation_report.py @@ -382,9 +382,9 @@ def make_dashboard_row_df(self): classification_summary.proportion_correct ) - row[ - f"Classification {i+1} # evaluated" - ] = classification_summary.num_evaluated + row[f"Classification {i+1} # evaluated"] = ( + classification_summary.num_evaluated + ) df = pd.DataFrame([row]) df.to_csv( @@ -408,9 +408,9 @@ def make_experiments_evaluation_df(self): "ice_commit": latest_commit_hash(), "document_id": result.document_id, "split": result.gold_standard.split if result.gold_standard else None, - "experiment": result.gold_standard.experiment - if result.gold_standard - else None, + "experiment": ( + result.gold_standard.experiment if result.gold_standard else None + ), "total_gs_quotes": len( result.evaluated_excerpts.gold_standards_in_excerpts_results ), @@ -420,9 +420,9 @@ def make_experiments_evaluation_df(self): "excerpts": result.evaluated_excerpts.excerpts, "gs_quotes": result.evaluated_excerpts.gold_standards_str(), "answer": result.answer, - "gs_answer": result.gold_standard.answer - if result.gold_standard - else None, + "gs_answer": ( + result.gold_standard.answer if result.gold_standard else None + ), "answer_rating": result.answer_rating, "failure_modes": result.failure_modes, } diff --git a/ice/evaluation/summarize_experiment_evals.py b/ice/evaluation/summarize_experiment_evals.py index 7079baa1..11683d78 100644 --- a/ice/evaluation/summarize_experiment_evals.py +++ b/ice/evaluation/summarize_experiment_evals.py @@ -35,13 +35,17 @@ async def summarize_experiment_evals(results_file: str): row.get("classification_1"), row.get("classification_2"), ], - answer_rating=None - if pd.isna(row.get("answer_rating")) - else int(row.get("answer_rating")), + answer_rating=( + None + if pd.isna(row.get("answer_rating")) + else int(row.get("answer_rating")) + ), elicit_commit=row.get("elicit_commit"), - failure_modes=None - if pd.isna(row.get("failure_modes")) - else row.failure_modes.split(","), + failure_modes=( + None + if pd.isna(row.get("failure_modes")) + else row.failure_modes.split(",") + ), ) for _, row in recipe_df.iterrows() ] diff --git a/ice/metrics/gold_paragraphs.py b/ice/metrics/gold_paragraphs.py index 9e5d85e6..792701d7 100644 --- a/ice/metrics/gold_paragraphs.py +++ b/ice/metrics/gold_paragraphs.py @@ -1,6 +1,7 @@ """ Make a dataframe that contains the paragraphs that contain the gold standard quotes. """ + import asyncio from pathlib import Path from typing import Optional diff --git a/ice/metrics/gold_standards.py b/ice/metrics/gold_standards.py index 7c0e43d6..96e8ee89 100644 --- a/ice/metrics/gold_standards.py +++ b/ice/metrics/gold_standards.py @@ -184,8 +184,7 @@ def get_gold_standards( question_short_name: Optional[str] = None, experiment: Optional[str] = None, model_type: None = None, -) -> list[GoldStandard[Any]]: - ... +) -> list[GoldStandard[Any]]: ... @overload @@ -195,8 +194,7 @@ def get_gold_standards( document_id: Optional[str] = None, question_short_name: Optional[str] = None, experiment: Optional[str] = None, -) -> list[GoldStandard[ParsedGoldStandardType]]: - ... +) -> list[GoldStandard[ParsedGoldStandardType]]: ... def get_gold_standards( @@ -226,8 +224,7 @@ def get_gold_standard( question_short_name: Optional[str] = None, experiment: Optional[str] = None, model_type: None = None, -) -> Optional[GoldStandard[Any]]: - ... +) -> Optional[GoldStandard[Any]]: ... @overload @@ -237,8 +234,7 @@ def get_gold_standard( document_id: Optional[str] = None, question_short_name: Optional[str] = None, experiment: Optional[str] = None, -) -> Optional[GoldStandard[ParsedGoldStandardType]]: - ... +) -> Optional[GoldStandard[ParsedGoldStandardType]]: ... def get_gold_standard( diff --git a/ice/paper.py b/ice/paper.py index 8dcd9337..65526b3e 100644 --- a/ice/paper.py +++ b/ice/paper.py @@ -93,9 +93,9 @@ def parse_txt(file: Path) -> list[dict]: "number": section_title_number(current_section), } ], - "sectionType": "abstract" - if current_section == "Abstract" - else "main", + "sectionType": ( + "abstract" if current_section == "Abstract" else "main" + ), } ) return body diff --git a/ice/recipes/blinding_dynamic.py b/ice/recipes/blinding_dynamic.py index 31160bbe..3fd790e3 100644 --- a/ice/recipes/blinding_dynamic.py +++ b/ice/recipes/blinding_dynamic.py @@ -21,6 +21,7 @@ - routledge-2006.pdf - vittengl-2009.pdf """ + import itertools from typing import Any from typing import Literal @@ -344,9 +345,9 @@ async def run(self, paper: Paper): results_by_intervention: dict[str, dict[Group, dict[str, Any]]] = {} interventions = await self.interventions(paper) for intervention in interventions: - results_by_intervention[ - intervention - ] = await self.blinding_for_intervention(paper, intervention) + results_by_intervention[intervention] = ( + await self.blinding_for_intervention(paper, intervention) + ) recipe_results: list[RecipeResult] = [] for intervention in interventions: diff --git a/ice/recipes/consort_flow/baselines.py b/ice/recipes/consort_flow/baselines.py index 7dcac638..35ff22de 100644 --- a/ice/recipes/consort_flow/baselines.py +++ b/ice/recipes/consort_flow/baselines.py @@ -342,9 +342,11 @@ async def _all_options( except TooLongRequestError: selections = remove_lowest_perplexity(selections) return PaperQaAnswer( - answer=["The question is not answered in the text."] - if do_return_list - else "The question is not answered in the text.", + answer=( + ["The question is not answered in the text."] + if do_return_list + else "The question is not answered in the text." + ), support_candidates=texts, support_labels=[False for text in texts], support_scores=[t[1] for t in texts_with_perplexities], diff --git a/ice/recipes/consort_flow/golds.py b/ice/recipes/consort_flow/golds.py index 0fafae77..5670429a 100644 --- a/ice/recipes/consort_flow/golds.py +++ b/ice/recipes/consort_flow/golds.py @@ -54,9 +54,11 @@ def paper_to_allocation_gold_standards( ( f"The {exp.name} experiment included {len(exp.arms or [])} arms: {', '.join((arm.name for arm in exp.arms or []))}. How many participants were initially allocated to the {arm.name} arm of the {exp.name} experiment?", texts, - arm.allocated.quotes - if arm.allocated and isinstance(arm.allocated, SampleSize) - else [], + ( + arm.allocated.quotes + if arm.allocated and isinstance(arm.allocated, SampleSize) + else [] + ), ) for exp in gs.parsed_answer.experiments for arm in (exp.arms or []) diff --git a/ice/recipes/experiments_and_arms/recipes/best_passages.py b/ice/recipes/experiments_and_arms/recipes/best_passages.py index eded3d83..0546bd70 100644 --- a/ice/recipes/experiments_and_arms/recipes/best_passages.py +++ b/ice/recipes/experiments_and_arms/recipes/best_passages.py @@ -43,9 +43,11 @@ async def rank_passages_selector( ) # really small non-infinite number closest = min( samples, - key=lambda sample: abs(sample.final_answer - mean_score) - if sample.final_answer - else float("inf"), + key=lambda sample: ( + abs(sample.final_answer - mean_score) + if sample.final_answer + else float("inf") + ), ) return PassageWithReasoning( passage=closest.passage, @@ -154,9 +156,9 @@ async def score( sorted_answers = sorted( answers, - key=lambda prs: prs.final_answer - if prs.final_answer is not None - else float("-inf"), + key=lambda prs: ( + prs.final_answer if prs.final_answer is not None else float("-inf") + ), reverse=True, ) return sorted_answers diff --git a/ice/recipes/experiments_and_arms/recipes/name_experiments.py b/ice/recipes/experiments_and_arms/recipes/name_experiments.py index aa6f7736..086fa294 100644 --- a/ice/recipes/experiments_and_arms/recipes/name_experiments.py +++ b/ice/recipes/experiments_and_arms/recipes/name_experiments.py @@ -156,13 +156,15 @@ async def name_experiments( assert experiment_names.final_answer is not None return ( gs_names, - [ - strip_enumeration_prefix(exp_name) - for exp_name in standardized_answer.split("\n") - if exp_name.strip() - ] - if standardized_answer - else [], + ( + [ + strip_enumeration_prefix(exp_name) + for exp_name in standardized_answer.split("\n") + if exp_name.strip() + ] + if standardized_answer + else [] + ), paragraphs_to_keep, [str(p) for p in paragraphs], ) diff --git a/ice/recipes/meta/eval_paper_qa/common_baselines.py b/ice/recipes/meta/eval_paper_qa/common_baselines.py index 3b36d502..e2784647 100644 --- a/ice/recipes/meta/eval_paper_qa/common_baselines.py +++ b/ice/recipes/meta/eval_paper_qa/common_baselines.py @@ -197,9 +197,11 @@ async def preselected_few_shot_qa_baseline( Demonstration( question=g.question, texts=g.gold_support, - answer=g.gold_answer - if isinstance(g.gold_answer, str) - else numbered_list(g.gold_answer).transform(), + answer=( + g.gold_answer + if isinstance(g.gold_answer, str) + else numbered_list(g.gold_answer).transform() + ), ) for g in demonstration_examples ] diff --git a/ice/recipes/meta/eval_paper_qa/types.py b/ice/recipes/meta/eval_paper_qa/types.py index 8d128d32..672018a8 100644 --- a/ice/recipes/meta/eval_paper_qa/types.py +++ b/ice/recipes/meta/eval_paper_qa/types.py @@ -60,8 +60,7 @@ async def __call__( __paper: Paper, __question: str, __gold_support: Optional[Sequence[str]] = None, - ) -> PaperQaAnswer[AnswerType_contra]: - ... + ) -> PaperQaAnswer[AnswerType_contra]: ... class AnswerEvalMethod(Protocol[AnswerType_contra]): @@ -70,8 +69,7 @@ async def __call__( question: str, ground_truth: AnswerType_contra, prediction: AnswerType_contra, - ) -> tuple[bool, str]: - ... + ) -> tuple[bool, str]: ... class ClassificationEvalMethod(Protocol): @@ -81,5 +79,4 @@ async def __call__( predictions: Sequence[bool], ground_truth: Sequence[str], scores: Optional[Sequence[float]] = None, - ) -> BinaryClassificationMetrics: - ... + ) -> BinaryClassificationMetrics: ... diff --git a/ice/recipes/placebo_dialogs.py b/ice/recipes/placebo_dialogs.py index ce562956..c616d851 100644 --- a/ice/recipes/placebo_dialogs.py +++ b/ice/recipes/placebo_dialogs.py @@ -402,9 +402,11 @@ async def analyze_experiment(self, paper: Paper, experiment: Experiment): experiment=experiment, classifications=[ aggregate_used["answer"], - "Placebo" - if has_placebo_info - else "No placebo or placebo not mentioned", + ( + "Placebo" + if has_placebo_info + else "No placebo or placebo not mentioned" + ), ], answer=placebo_result, result=placebo_result, diff --git a/ice/recipes/placebo_keyword_baseline.py b/ice/recipes/placebo_keyword_baseline.py index d13ab3d5..8c19d94a 100644 --- a/ice/recipes/placebo_keyword_baseline.py +++ b/ice/recipes/placebo_keyword_baseline.py @@ -54,9 +54,11 @@ async def run(self, paper: Paper): result=f"{placebo_answer.classification}: {placebo_answer.sentence}", answer=f"{placebo_answer.classification}: {placebo_answer.sentence}", classifications=[ - "Placebo" - if placebo_answer.classification == "Placebo" - else "No placebo or placebo not mentioned", + ( + "Placebo" + if placebo_answer.classification == "Placebo" + else "No placebo or placebo not mentioned" + ), placebo_answer.classification, ], excerpts=[placebo_answer.sentence], diff --git a/ice/recipes/primer/answer_by_dispatch/types.py b/ice/recipes/primer/answer_by_dispatch/types.py index 2277cb2e..d1abe9fa 100644 --- a/ice/recipes/primer/answer_by_dispatch/types.py +++ b/ice/recipes/primer/answer_by_dispatch/types.py @@ -7,8 +7,7 @@ class QuestionRecipe(Protocol): - async def __call__(self, question: str) -> str: - ... + async def __call__(self, question: str) -> str: ... @dataclass diff --git a/ice/recipes/primer/sequential_action.py b/ice/recipes/primer/sequential_action.py index 50b8a840..97450d42 100644 --- a/ice/recipes/primer/sequential_action.py +++ b/ice/recipes/primer/sequential_action.py @@ -82,16 +82,13 @@ async def answer_directly(question: str, log: Log) -> str: class Action(ABC): @classmethod @abstractmethod - async def propose(cls, question: str, log: Log, max_actions: int) -> "Action": - ... + async def propose(cls, question: str, log: Log, max_actions: int) -> "Action": ... @abstractmethod - def run(self): - ... + def run(self): ... @abstractmethod - def make_log_entry(self, result: str) -> str: - ... + def make_log_entry(self, result: str) -> str: ... @dataclass diff --git a/ice/recipes/program_search/nodes/decontext/prompts.py b/ice/recipes/program_search/nodes/decontext/prompts.py index 6ac8d7d6..6aab54c9 100644 --- a/ice/recipes/program_search/nodes/decontext/prompts.py +++ b/ice/recipes/program_search/nodes/decontext/prompts.py @@ -83,12 +83,16 @@ def decontext_prompt( if questions: last_example["questions"] = numbered_list(questions) examples = format_multi( - QUESTION_GUIDED_EXAMPLE_TEMPLATE - if questions - else QUESTION_FREE_EXAMPLE_TEMPLATE, - QUESTION_GUIDED_EXAMPLES + [last_example] - if questions - else QUESTION_FREE_EXAMPLES + [last_example], + ( + QUESTION_GUIDED_EXAMPLE_TEMPLATE + if questions + else QUESTION_FREE_EXAMPLE_TEMPLATE + ), + ( + QUESTION_GUIDED_EXAMPLES + [last_example] + if questions + else QUESTION_FREE_EXAMPLES + [last_example] + ), ) return "\n\n".join( ( diff --git a/ice/recipes/program_search/nodes/select/prompts.py b/ice/recipes/program_search/nodes/select/prompts.py index e2c0dad1..3459dfce 100644 --- a/ice/recipes/program_search/nodes/select/prompts.py +++ b/ice/recipes/program_search/nodes/select/prompts.py @@ -114,9 +114,11 @@ def render_selection_example( question=question, existing=numbered_list(example.existing) if example.existing else NO_EXISTING, texts=numbered_list([str(text) for text in example.selection]), - selections=NONE_ANSWER - if not example.positive_idxs - else str(example.positive_idxs[0] + 1), + selections=( + NONE_ANSWER + if not example.positive_idxs + else str(example.positive_idxs[0] + 1) + ), NONE_ANSWER=NONE_ANSWER, ) diff --git a/ice/recipes/program_search/nodes/select/select.py b/ice/recipes/program_search/nodes/select/select.py index 7107cf5a..5fc77101 100644 --- a/ice/recipes/program_search/nodes/select/select.py +++ b/ice/recipes/program_search/nodes/select/select.py @@ -305,9 +305,11 @@ async def select_using_elicit_prompt_few_shot( if include_negative: demonstrations_or_none = [ - (await elicit_negative_few_shot_example(example, max_examples=1)) - if idx % 3 == 0 # more positive than negative examples - else (await positive_few_shot_example(example, max_examples=1)) + ( + (await elicit_negative_few_shot_example(example, max_examples=1)) + if idx % 3 == 0 # more positive than negative examples + else (await positive_few_shot_example(example, max_examples=1)) + ) for idx, example in enumerate(examples) ] else: diff --git a/ice/recipes/program_search/types.py b/ice/recipes/program_search/types.py index 928251ba..1d1fe3ec 100644 --- a/ice/recipes/program_search/types.py +++ b/ice/recipes/program_search/types.py @@ -75,8 +75,7 @@ class Trace(BaseModel): T = t.TypeVar("T") -class Beam(GenericModel, t.Generic[T]): - ... +class Beam(GenericModel, t.Generic[T]): ... def remove_lowest_perplexity(results: t.Sequence[tuple[str, float]]): diff --git a/ice/recipes/program_search/utils/find_examples.py b/ice/recipes/program_search/utils/find_examples.py index 193e5bf2..abc0def3 100644 --- a/ice/recipes/program_search/utils/find_examples.py +++ b/ice/recipes/program_search/utils/find_examples.py @@ -77,9 +77,9 @@ async def rouge_distractor_scores( scores = await rouge_texts(hypotheses=hypotheses, references=references) return { text: ( - lambda s: s.rouge_l.r - if s.rouge_l.r < lcs_threshold and s.rouge_3.r == 0 - else 0 + lambda s: ( + s.rouge_l.r if s.rouge_l.r < lcs_threshold and s.rouge_3.r == 0 else 0 + ) )(score) for text, score in scores.items() } diff --git a/ice/trace.py b/ice/trace.py index 33a28a29..d9cb8b2e 100644 --- a/ice/trace.py +++ b/ice/trace.py @@ -309,8 +309,7 @@ def __new__(mcls, name, bases, namespace): ) -class TracedABC(metaclass=TracedABCMeta): - ... +class TracedABC(metaclass=TracedABCMeta): ... # TODO this and the functions it calls needs to be replaced with a better system