From a62351deccca5340e85d9b6453031a052a02c8cc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 1 Jul 2024 18:00:18 +0000
Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ice/agent.py                                  |  3 +--
 ice/cache.py                                  |  1 +
 ice/evaluation/evaluate_recipe_result.py      | 20 ++++++++++---------
 ice/evaluation/evaluation_report.py           | 18 ++++++++---------
 ice/evaluation/summarize_experiment_evals.py  | 16 +++++++++------
 ice/metrics/gold_paragraphs.py                |  1 +
 ice/metrics/gold_standards.py                 | 12 ++++-------
 ice/paper.py                                  |  6 +++---
 ice/recipes/blinding_dynamic.py               |  7 ++++---
 ice/recipes/consort_flow/baselines.py         |  8 +++++---
 ice/recipes/consort_flow/golds.py             |  8 +++++---
 .../recipes/best_passages.py                  | 14 +++++++------
 .../recipes/name_experiments.py               | 16 ++++++++-------
 .../meta/eval_paper_qa/common_baselines.py    |  8 +++++---
 ice/recipes/meta/eval_paper_qa/types.py       |  9 +++------
 ice/recipes/placebo_dialogs.py                |  8 +++++---
 ice/recipes/placebo_keyword_baseline.py       |  8 +++++---
 .../primer/answer_by_dispatch/types.py        |  3 +--
 ice/recipes/primer/sequential_action.py       |  9 +++------
 .../program_search/nodes/decontext/prompts.py | 16 +++++++++------
 .../program_search/nodes/select/prompts.py    |  8 +++++---
 .../program_search/nodes/select/select.py     |  8 +++++---
 ice/recipes/program_search/types.py           |  3 +--
 .../program_search/utils/find_examples.py     |  6 +++---
 ice/trace.py                                  |  3 +--
 25 files changed, 118 insertions(+), 101 deletions(-)

diff --git a/ice/agent.py b/ice/agent.py
index ae3c6b6e..7ee5a951 100644
--- a/ice/agent.py
+++ b/ice/agent.py
@@ -21,8 +21,7 @@
 except ImportError:
 
     class Tfew(Agent):
-        def __init__(self, *args, **kwargs):
-            ...
+        def __init__(self, *args, **kwargs): ...
 
 
 MACHINE_AGENTS = {
diff --git a/ice/cache.py b/ice/cache.py
index 4a72ee6f..60936cc5 100644
--- a/ice/cache.py
+++ b/ice/cache.py
@@ -1,6 +1,7 @@
 """
 Decorator for caching function results to disk
 """
+
 import asyncio
 import functools
 import inspect
diff --git a/ice/evaluation/evaluate_recipe_result.py b/ice/evaluation/evaluate_recipe_result.py
index 824ea578..6464dea5 100644
--- a/ice/evaluation/evaluate_recipe_result.py
+++ b/ice/evaluation/evaluate_recipe_result.py
@@ -138,9 +138,7 @@ def __str__(self) -> str:
         correctness = (
             "Correct"
             if self.is_correct == True
-            else "Incorrect"
-            if self.is_correct == False
-            else "Not evaluated"
+            else "Incorrect" if self.is_correct == False else "Not evaluated"
         )
         return f"""{correctness}.
     - Predicted: {self.predicted}
@@ -196,13 +194,17 @@ def evaluated_classifications(self) -> list[EvaluatedClassification]:
 
         for i in range(0, max(len(recipe_classifications), len(gold_classifications))):
             evaluated_classification = EvaluatedClassification(
-                predicted=recipe_classifications[i]
-                if i < len(recipe_classifications)
-                else None,
+                predicted=(
+                    recipe_classifications[i]
+                    if i < len(recipe_classifications)
+                    else None
+                ),
                 gold=gold_classifications[i] if i < len(gold_classifications) else None,
-                classification_eq=self.classification_eq[i]
-                if i < len(self.classification_eq)
-                else None,
+                classification_eq=(
+                    self.classification_eq[i]
+                    if i < len(self.classification_eq)
+                    else None
+                ),
             )
 
             evaluated_classifications.append(evaluated_classification)
diff --git a/ice/evaluation/evaluation_report.py b/ice/evaluation/evaluation_report.py
index c41d1791..34727bae 100644
--- a/ice/evaluation/evaluation_report.py
+++ b/ice/evaluation/evaluation_report.py
@@ -382,9 +382,9 @@ def make_dashboard_row_df(self):
                 classification_summary.proportion_correct
             )
 
-            row[
-                f"Classification {i+1} # evaluated"
-            ] = classification_summary.num_evaluated
+            row[f"Classification {i+1} # evaluated"] = (
+                classification_summary.num_evaluated
+            )
 
         df = pd.DataFrame([row])
         df.to_csv(
@@ -408,9 +408,9 @@ def make_experiments_evaluation_df(self):
                 "ice_commit": latest_commit_hash(),
                 "document_id": result.document_id,
                 "split": result.gold_standard.split if result.gold_standard else None,
-                "experiment": result.gold_standard.experiment
-                if result.gold_standard
-                else None,
+                "experiment": (
+                    result.gold_standard.experiment if result.gold_standard else None
+                ),
                 "total_gs_quotes": len(
                     result.evaluated_excerpts.gold_standards_in_excerpts_results
                 ),
@@ -420,9 +420,9 @@ def make_experiments_evaluation_df(self):
                 "excerpts": result.evaluated_excerpts.excerpts,
                 "gs_quotes": result.evaluated_excerpts.gold_standards_str(),
                 "answer": result.answer,
-                "gs_answer": result.gold_standard.answer
-                if result.gold_standard
-                else None,
+                "gs_answer": (
+                    result.gold_standard.answer if result.gold_standard else None
+                ),
                 "answer_rating": result.answer_rating,
                 "failure_modes": result.failure_modes,
             }
diff --git a/ice/evaluation/summarize_experiment_evals.py b/ice/evaluation/summarize_experiment_evals.py
index 7079baa1..11683d78 100644
--- a/ice/evaluation/summarize_experiment_evals.py
+++ b/ice/evaluation/summarize_experiment_evals.py
@@ -35,13 +35,17 @@ async def summarize_experiment_evals(results_file: str):
                     row.get("classification_1"),
                     row.get("classification_2"),
                 ],
-                answer_rating=None
-                if pd.isna(row.get("answer_rating"))
-                else int(row.get("answer_rating")),
+                answer_rating=(
+                    None
+                    if pd.isna(row.get("answer_rating"))
+                    else int(row.get("answer_rating"))
+                ),
                 elicit_commit=row.get("elicit_commit"),
-                failure_modes=None
-                if pd.isna(row.get("failure_modes"))
-                else row.failure_modes.split(","),
+                failure_modes=(
+                    None
+                    if pd.isna(row.get("failure_modes"))
+                    else row.failure_modes.split(",")
+                ),
             )
             for _, row in recipe_df.iterrows()
         ]
diff --git a/ice/metrics/gold_paragraphs.py b/ice/metrics/gold_paragraphs.py
index 9e5d85e6..792701d7 100644
--- a/ice/metrics/gold_paragraphs.py
+++ b/ice/metrics/gold_paragraphs.py
@@ -1,6 +1,7 @@
 """
 Make a dataframe that contains the paragraphs that contain the gold standard quotes.
 """
+
 import asyncio
 from pathlib import Path
 from typing import Optional
diff --git a/ice/metrics/gold_standards.py b/ice/metrics/gold_standards.py
index 7c0e43d6..96e8ee89 100644
--- a/ice/metrics/gold_standards.py
+++ b/ice/metrics/gold_standards.py
@@ -184,8 +184,7 @@ def get_gold_standards(
     question_short_name: Optional[str] = None,
     experiment: Optional[str] = None,
     model_type: None = None,
-) -> list[GoldStandard[Any]]:
-    ...
+) -> list[GoldStandard[Any]]: ...
 
 
 @overload
@@ -195,8 +194,7 @@ def get_gold_standards(
     document_id: Optional[str] = None,
     question_short_name: Optional[str] = None,
     experiment: Optional[str] = None,
-) -> list[GoldStandard[ParsedGoldStandardType]]:
-    ...
+) -> list[GoldStandard[ParsedGoldStandardType]]: ...
 
 
 def get_gold_standards(
@@ -226,8 +224,7 @@ def get_gold_standard(
     question_short_name: Optional[str] = None,
     experiment: Optional[str] = None,
     model_type: None = None,
-) -> Optional[GoldStandard[Any]]:
-    ...
+) -> Optional[GoldStandard[Any]]: ...
 
 
 @overload
@@ -237,8 +234,7 @@ def get_gold_standard(
     document_id: Optional[str] = None,
     question_short_name: Optional[str] = None,
     experiment: Optional[str] = None,
-) -> Optional[GoldStandard[ParsedGoldStandardType]]:
-    ...
+) -> Optional[GoldStandard[ParsedGoldStandardType]]: ...
 
 
 def get_gold_standard(
diff --git a/ice/paper.py b/ice/paper.py
index 8dcd9337..65526b3e 100644
--- a/ice/paper.py
+++ b/ice/paper.py
@@ -93,9 +93,9 @@ def parse_txt(file: Path) -> list[dict]:
                                 "number": section_title_number(current_section),
                             }
                         ],
-                        "sectionType": "abstract"
-                        if current_section == "Abstract"
-                        else "main",
+                        "sectionType": (
+                            "abstract" if current_section == "Abstract" else "main"
+                        ),
                     }
                 )
     return body
diff --git a/ice/recipes/blinding_dynamic.py b/ice/recipes/blinding_dynamic.py
index 31160bbe..3fd790e3 100644
--- a/ice/recipes/blinding_dynamic.py
+++ b/ice/recipes/blinding_dynamic.py
@@ -21,6 +21,7 @@
 - routledge-2006.pdf
 - vittengl-2009.pdf
 """
+
 import itertools
 from typing import Any
 from typing import Literal
@@ -344,9 +345,9 @@ async def run(self, paper: Paper):
         results_by_intervention: dict[str, dict[Group, dict[str, Any]]] = {}
         interventions = await self.interventions(paper)
         for intervention in interventions:
-            results_by_intervention[
-                intervention
-            ] = await self.blinding_for_intervention(paper, intervention)
+            results_by_intervention[intervention] = (
+                await self.blinding_for_intervention(paper, intervention)
+            )
 
         recipe_results: list[RecipeResult] = []
         for intervention in interventions:
diff --git a/ice/recipes/consort_flow/baselines.py b/ice/recipes/consort_flow/baselines.py
index 7dcac638..35ff22de 100644
--- a/ice/recipes/consort_flow/baselines.py
+++ b/ice/recipes/consort_flow/baselines.py
@@ -342,9 +342,11 @@ async def _all_options(
         except TooLongRequestError:
             selections = remove_lowest_perplexity(selections)
     return PaperQaAnswer(
-        answer=["The question is not answered in the text."]
-        if do_return_list
-        else "The question is not answered in the text.",
+        answer=(
+            ["The question is not answered in the text."]
+            if do_return_list
+            else "The question is not answered in the text."
+        ),
         support_candidates=texts,
         support_labels=[False for text in texts],
         support_scores=[t[1] for t in texts_with_perplexities],
diff --git a/ice/recipes/consort_flow/golds.py b/ice/recipes/consort_flow/golds.py
index 0fafae77..5670429a 100644
--- a/ice/recipes/consort_flow/golds.py
+++ b/ice/recipes/consort_flow/golds.py
@@ -54,9 +54,11 @@ def paper_to_allocation_gold_standards(
         (
             f"The {exp.name} experiment included {len(exp.arms or [])} arms: {', '.join((arm.name for arm in exp.arms or []))}. How many participants were initially allocated to the {arm.name} arm of the {exp.name} experiment?",
             texts,
-            arm.allocated.quotes
-            if arm.allocated and isinstance(arm.allocated, SampleSize)
-            else [],
+            (
+                arm.allocated.quotes
+                if arm.allocated and isinstance(arm.allocated, SampleSize)
+                else []
+            ),
         )
         for exp in gs.parsed_answer.experiments
         for arm in (exp.arms or [])
diff --git a/ice/recipes/experiments_and_arms/recipes/best_passages.py b/ice/recipes/experiments_and_arms/recipes/best_passages.py
index eded3d83..0546bd70 100644
--- a/ice/recipes/experiments_and_arms/recipes/best_passages.py
+++ b/ice/recipes/experiments_and_arms/recipes/best_passages.py
@@ -43,9 +43,11 @@ async def rank_passages_selector(
     )  # really small non-infinite number
     closest = min(
         samples,
-        key=lambda sample: abs(sample.final_answer - mean_score)
-        if sample.final_answer
-        else float("inf"),
+        key=lambda sample: (
+            abs(sample.final_answer - mean_score)
+            if sample.final_answer
+            else float("inf")
+        ),
     )
     return PassageWithReasoning(
         passage=closest.passage,
@@ -154,9 +156,9 @@ async def score(
 
         sorted_answers = sorted(
             answers,
-            key=lambda prs: prs.final_answer
-            if prs.final_answer is not None
-            else float("-inf"),
+            key=lambda prs: (
+                prs.final_answer if prs.final_answer is not None else float("-inf")
+            ),
             reverse=True,
         )
         return sorted_answers
diff --git a/ice/recipes/experiments_and_arms/recipes/name_experiments.py b/ice/recipes/experiments_and_arms/recipes/name_experiments.py
index aa6f7736..086fa294 100644
--- a/ice/recipes/experiments_and_arms/recipes/name_experiments.py
+++ b/ice/recipes/experiments_and_arms/recipes/name_experiments.py
@@ -156,13 +156,15 @@ async def name_experiments(
     assert experiment_names.final_answer is not None
     return (
         gs_names,
-        [
-            strip_enumeration_prefix(exp_name)
-            for exp_name in standardized_answer.split("\n")
-            if exp_name.strip()
-        ]
-        if standardized_answer
-        else [],
+        (
+            [
+                strip_enumeration_prefix(exp_name)
+                for exp_name in standardized_answer.split("\n")
+                if exp_name.strip()
+            ]
+            if standardized_answer
+            else []
+        ),
         paragraphs_to_keep,
         [str(p) for p in paragraphs],
     )
diff --git a/ice/recipes/meta/eval_paper_qa/common_baselines.py b/ice/recipes/meta/eval_paper_qa/common_baselines.py
index 3b36d502..e2784647 100644
--- a/ice/recipes/meta/eval_paper_qa/common_baselines.py
+++ b/ice/recipes/meta/eval_paper_qa/common_baselines.py
@@ -197,9 +197,11 @@ async def preselected_few_shot_qa_baseline(
         Demonstration(
             question=g.question,
             texts=g.gold_support,
-            answer=g.gold_answer
-            if isinstance(g.gold_answer, str)
-            else numbered_list(g.gold_answer).transform(),
+            answer=(
+                g.gold_answer
+                if isinstance(g.gold_answer, str)
+                else numbered_list(g.gold_answer).transform()
+            ),
         )
         for g in demonstration_examples
     ]
diff --git a/ice/recipes/meta/eval_paper_qa/types.py b/ice/recipes/meta/eval_paper_qa/types.py
index 8d128d32..672018a8 100644
--- a/ice/recipes/meta/eval_paper_qa/types.py
+++ b/ice/recipes/meta/eval_paper_qa/types.py
@@ -60,8 +60,7 @@ async def __call__(
         __paper: Paper,
         __question: str,
         __gold_support: Optional[Sequence[str]] = None,
-    ) -> PaperQaAnswer[AnswerType_contra]:
-        ...
+    ) -> PaperQaAnswer[AnswerType_contra]: ...
 
 
 class AnswerEvalMethod(Protocol[AnswerType_contra]):
@@ -70,8 +69,7 @@ async def __call__(
         question: str,
         ground_truth: AnswerType_contra,
         prediction: AnswerType_contra,
-    ) -> tuple[bool, str]:
-        ...
+    ) -> tuple[bool, str]: ...
 
 
 class ClassificationEvalMethod(Protocol):
@@ -81,5 +79,4 @@ async def __call__(
         predictions: Sequence[bool],
         ground_truth: Sequence[str],
         scores: Optional[Sequence[float]] = None,
-    ) -> BinaryClassificationMetrics:
-        ...
+    ) -> BinaryClassificationMetrics: ...
diff --git a/ice/recipes/placebo_dialogs.py b/ice/recipes/placebo_dialogs.py
index ce562956..c616d851 100644
--- a/ice/recipes/placebo_dialogs.py
+++ b/ice/recipes/placebo_dialogs.py
@@ -402,9 +402,11 @@ async def analyze_experiment(self, paper: Paper, experiment: Experiment):
                     experiment=experiment,
                     classifications=[
                         aggregate_used["answer"],
-                        "Placebo"
-                        if has_placebo_info
-                        else "No placebo or placebo not mentioned",
+                        (
+                            "Placebo"
+                            if has_placebo_info
+                            else "No placebo or placebo not mentioned"
+                        ),
                     ],
                     answer=placebo_result,
                     result=placebo_result,
diff --git a/ice/recipes/placebo_keyword_baseline.py b/ice/recipes/placebo_keyword_baseline.py
index d13ab3d5..8c19d94a 100644
--- a/ice/recipes/placebo_keyword_baseline.py
+++ b/ice/recipes/placebo_keyword_baseline.py
@@ -54,9 +54,11 @@ async def run(self, paper: Paper):
                     result=f"{placebo_answer.classification}: {placebo_answer.sentence}",
                     answer=f"{placebo_answer.classification}: {placebo_answer.sentence}",
                     classifications=[
-                        "Placebo"
-                        if placebo_answer.classification == "Placebo"
-                        else "No placebo or placebo not mentioned",
+                        (
+                            "Placebo"
+                            if placebo_answer.classification == "Placebo"
+                            else "No placebo or placebo not mentioned"
+                        ),
                         placebo_answer.classification,
                     ],
                     excerpts=[placebo_answer.sentence],
diff --git a/ice/recipes/primer/answer_by_dispatch/types.py b/ice/recipes/primer/answer_by_dispatch/types.py
index 2277cb2e..d1abe9fa 100644
--- a/ice/recipes/primer/answer_by_dispatch/types.py
+++ b/ice/recipes/primer/answer_by_dispatch/types.py
@@ -7,8 +7,7 @@
 
 
 class QuestionRecipe(Protocol):
-    async def __call__(self, question: str) -> str:
-        ...
+    async def __call__(self, question: str) -> str: ...
 
 
 @dataclass
diff --git a/ice/recipes/primer/sequential_action.py b/ice/recipes/primer/sequential_action.py
index 50b8a840..97450d42 100644
--- a/ice/recipes/primer/sequential_action.py
+++ b/ice/recipes/primer/sequential_action.py
@@ -82,16 +82,13 @@ async def answer_directly(question: str, log: Log) -> str:
 class Action(ABC):
     @classmethod
     @abstractmethod
-    async def propose(cls, question: str, log: Log, max_actions: int) -> "Action":
-        ...
+    async def propose(cls, question: str, log: Log, max_actions: int) -> "Action": ...
 
     @abstractmethod
-    def run(self):
-        ...
+    def run(self): ...
 
     @abstractmethod
-    def make_log_entry(self, result: str) -> str:
-        ...
+    def make_log_entry(self, result: str) -> str: ...
 
 
 @dataclass
diff --git a/ice/recipes/program_search/nodes/decontext/prompts.py b/ice/recipes/program_search/nodes/decontext/prompts.py
index 6ac8d7d6..6aab54c9 100644
--- a/ice/recipes/program_search/nodes/decontext/prompts.py
+++ b/ice/recipes/program_search/nodes/decontext/prompts.py
@@ -83,12 +83,16 @@ def decontext_prompt(
     if questions:
         last_example["questions"] = numbered_list(questions)
     examples = format_multi(
-        QUESTION_GUIDED_EXAMPLE_TEMPLATE
-        if questions
-        else QUESTION_FREE_EXAMPLE_TEMPLATE,
-        QUESTION_GUIDED_EXAMPLES + [last_example]
-        if questions
-        else QUESTION_FREE_EXAMPLES + [last_example],
+        (
+            QUESTION_GUIDED_EXAMPLE_TEMPLATE
+            if questions
+            else QUESTION_FREE_EXAMPLE_TEMPLATE
+        ),
+        (
+            QUESTION_GUIDED_EXAMPLES + [last_example]
+            if questions
+            else QUESTION_FREE_EXAMPLES + [last_example]
+        ),
     )
     return "\n\n".join(
         (
diff --git a/ice/recipes/program_search/nodes/select/prompts.py b/ice/recipes/program_search/nodes/select/prompts.py
index e2c0dad1..3459dfce 100644
--- a/ice/recipes/program_search/nodes/select/prompts.py
+++ b/ice/recipes/program_search/nodes/select/prompts.py
@@ -114,9 +114,11 @@ def render_selection_example(
         question=question,
         existing=numbered_list(example.existing) if example.existing else NO_EXISTING,
         texts=numbered_list([str(text) for text in example.selection]),
-        selections=NONE_ANSWER
-        if not example.positive_idxs
-        else str(example.positive_idxs[0] + 1),
+        selections=(
+            NONE_ANSWER
+            if not example.positive_idxs
+            else str(example.positive_idxs[0] + 1)
+        ),
         NONE_ANSWER=NONE_ANSWER,
     )
 
diff --git a/ice/recipes/program_search/nodes/select/select.py b/ice/recipes/program_search/nodes/select/select.py
index 7107cf5a..5fc77101 100644
--- a/ice/recipes/program_search/nodes/select/select.py
+++ b/ice/recipes/program_search/nodes/select/select.py
@@ -305,9 +305,11 @@ async def select_using_elicit_prompt_few_shot(
 
     if include_negative:
         demonstrations_or_none = [
-            (await elicit_negative_few_shot_example(example, max_examples=1))
-            if idx % 3 == 0  # more positive than negative examples
-            else (await positive_few_shot_example(example, max_examples=1))
+            (
+                (await elicit_negative_few_shot_example(example, max_examples=1))
+                if idx % 3 == 0  # more positive than negative examples
+                else (await positive_few_shot_example(example, max_examples=1))
+            )
             for idx, example in enumerate(examples)
         ]
     else:
diff --git a/ice/recipes/program_search/types.py b/ice/recipes/program_search/types.py
index 928251ba..1d1fe3ec 100644
--- a/ice/recipes/program_search/types.py
+++ b/ice/recipes/program_search/types.py
@@ -75,8 +75,7 @@ class Trace(BaseModel):
 T = t.TypeVar("T")
 
 
-class Beam(GenericModel, t.Generic[T]):
-    ...
+class Beam(GenericModel, t.Generic[T]): ...
 
 
 def remove_lowest_perplexity(results: t.Sequence[tuple[str, float]]):
diff --git a/ice/recipes/program_search/utils/find_examples.py b/ice/recipes/program_search/utils/find_examples.py
index 193e5bf2..abc0def3 100644
--- a/ice/recipes/program_search/utils/find_examples.py
+++ b/ice/recipes/program_search/utils/find_examples.py
@@ -77,9 +77,9 @@ async def rouge_distractor_scores(
     scores = await rouge_texts(hypotheses=hypotheses, references=references)
     return {
         text: (
-            lambda s: s.rouge_l.r
-            if s.rouge_l.r < lcs_threshold and s.rouge_3.r == 0
-            else 0
+            lambda s: (
+                s.rouge_l.r if s.rouge_l.r < lcs_threshold and s.rouge_3.r == 0 else 0
+            )
         )(score)
         for text, score in scores.items()
     }
diff --git a/ice/trace.py b/ice/trace.py
index 33a28a29..d9cb8b2e 100644
--- a/ice/trace.py
+++ b/ice/trace.py
@@ -309,8 +309,7 @@ def __new__(mcls, name, bases, namespace):
         )
 
 
-class TracedABC(metaclass=TracedABCMeta):
-    ...
+class TracedABC(metaclass=TracedABCMeta): ...
 
 
 # TODO this and the functions it calls needs to be replaced with a better system