From 6d17be1fa4912b32e3949f06aa7150f33f0a1b32 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Mon, 23 Dec 2024 18:43:34 -0500 Subject: [PATCH] fmt --- python/langsmith/evaluation/_arunner.py | 17 ++++++----------- python/langsmith/evaluation/_runner.py | 22 ++++++++++------------ python/langsmith/evaluation/evaluator.py | 14 +++++++++----- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 8dea802cd..80ba8db17 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -689,9 +689,7 @@ async def awith_summary_evaluators( summary_evaluators: Sequence[SUMMARY_EVALUATOR_T], ) -> _AsyncExperimentManager: wrapped_evaluators = _wrap_summary_evaluators(summary_evaluators) - aggregate_feedback_gen = self._aapply_summary_evaluators( - wrapped_evaluators, [r async for r in self.aget_results()] - ) + aggregate_feedback_gen = self._aapply_summary_evaluators(wrapped_evaluators) return _AsyncExperimentManager( await self.aget_examples(), experiment=self._experiment, @@ -858,15 +856,12 @@ async def _arun_evaluators( async def _aapply_summary_evaluators( self, summary_evaluators: Sequence[SUMMARY_EVALUATOR_T], - evaluation_results: List[schemas.ExperimentResultRow], ) -> AsyncIterator[EvaluationResults]: - runs, examples = [], [] - async_examples = aitertools.ensure_async_iterator(await self.aget_examples()) - async for run, example in aitertools.async_zip( - self.aget_runs(), async_examples - ): - runs.append(run) - examples.append(example) + runs, examples, evaluation_results = [], [], [] + async for row in self.aget_results(): + runs.append(row["run"]) + examples.append(row["example"]) + evaluation_results.append(row["evaluation_results"]["results"]) aggregate_feedback = [] project_id = self._get_experiment().id if self._upload_results else None current_context = rh.get_tracing_context() diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 06ae64dab..4e9970a95 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1454,9 +1454,7 @@ def with_summary_evaluators( wrapped_evaluators = _wrap_summary_evaluators(summary_evaluators) context = copy_context() aggregate_feedback_gen = context.run( - self._apply_summary_evaluators, - wrapped_evaluators, - [r for r in self.get_results()], + self._apply_summary_evaluators, wrapped_evaluators ) return _ExperimentManager( self.examples, @@ -1670,12 +1668,12 @@ def _score( def _apply_summary_evaluators( self, summary_evaluators: Sequence[SUMMARY_EVALUATOR_T], - evaluation_results: List[schemas.ExperimentResultRow], ) -> Generator[EvaluationResults, None, None]: - runs, examples = [], [] - for run, example in zip(self.runs, self.examples): - runs.append(run) - examples.append(example) + runs, examples, evaluation_results = [], [], [] + for row in self.get_results(): + runs.append(row["run"]) + examples.append(row["example"]) + evaluation_results.append(row["evaluation_results"]["results"]) aggregate_feedback = [] with ls_utils.ContextThreadPoolExecutor() as executor: project_id = self._get_experiment().id if self._upload_results else None @@ -1794,15 +1792,15 @@ def _wrap(evaluator: SUMMARY_EVALUATOR_T) -> SUMMARY_EVALUATOR_T: @functools.wraps(evaluator) def _wrapper_inner( - runs: Sequence[schemas.Run], - examples: Sequence[schemas.Example], - evaluation_results: Sequence[schemas.ExperimentResultRow], + runs: list[schemas.Run], + examples: list[schemas.Example], + evaluation_results: list[list[EvaluationResult]], ) -> Union[EvaluationResult, EvaluationResults]: @rh.traceable(name=eval_name) def _wrapper_super_inner( runs_: str, examples_: str, evaluation_results_: str ) -> Union[EvaluationResult, EvaluationResults]: - return evaluator(list(runs), list(examples), list(evaluation_results)) + return evaluator(runs, examples, evaluation_results) return _wrapper_super_inner( f"Runs[] (Length={len(runs)})", diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py index e62db709a..e732c0dd7 100644 --- a/python/langsmith/evaluation/evaluator.py +++ b/python/langsmith/evaluation/evaluator.py @@ -726,12 +726,16 @@ def _format_evaluator_result( [ Sequence[schemas.Run], Sequence[schemas.Example], - Sequence[ExperimentResultRow], + Sequence[Sequence[schemas.EvaluationResult]], ], Union[EvaluationResult, EvaluationResults], ], Callable[ - [List[schemas.Run], List[schemas.Example], List[ExperimentResultRow]], + [ + List[schemas.Run], + List[schemas.Example], + List[List[schemas.EvaluationResult]], + ], Union[EvaluationResult, EvaluationResults], ], ] @@ -772,7 +776,7 @@ def _normalize_summary_evaluator(func: Callable) -> SUMMARY_EVALUATOR_T: def wrapper( runs: Sequence[schemas.Run], examples: Sequence[schemas.Example], - evaluation_results: Sequence[ExperimentResultRow], + _: Sequence[Sequence[ExperimentResultRow]], ) -> Union[EvaluationResult, EvaluationResults]: result = func(runs, examples) if isinstance(result, EvaluationResult): @@ -785,10 +789,10 @@ def wrapper( return wrapper # type: ignore[return-value] else: - def wrapper( + def wrapper( # type: ignore[misc] runs: Sequence[schemas.Run], examples: Sequence[schemas.Example], - evaluation_results: Sequence[ExperimentResultRow], + evaluation_results: Sequence[Sequence[EvaluationResult]], ) -> Union[EvaluationResult, EvaluationResults]: arg_map = { "runs": runs,