diff --git a/python/docs/create_api_rst.py b/python/docs/create_api_rst.py index 2be0a1525..83fbc4757 100644 --- a/python/docs/create_api_rst.py +++ b/python/docs/create_api_rst.py @@ -105,7 +105,9 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers: else ( "enum" if issubclass(type_, Enum) - else "Pydantic" if issubclass(type_, BaseModel) else "Regular" + else "Pydantic" + if issubclass(type_, BaseModel) + else "Regular" ) ) # if hasattr(type_, "__slots__"): diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 9ad151bfc..eae858247 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -18,7 +18,7 @@ import pytest from langchain_core.runnables import chain as as_runnable -from langsmith import Client, aevaluate, evaluate +from langsmith import Client, EvaluationResult, aevaluate, evaluate from langsmith import schemas as ls_schemas from langsmith.evaluation._runner import _include_attachments from langsmith.evaluation.evaluator import ( @@ -978,6 +978,10 @@ def summary_eval_outputs_reference(outputs, reference_outputs): return min([len(x["response"]) for x in outputs]) +def summary_eval_outputs_reference(evaluation_results): + return len(evaluation_results) + + @pytest.mark.parametrize( "evaluator", [ @@ -1004,7 +1008,8 @@ def test__normalize_summary_evaluator(evaluator: Callable) -> None: inputs={"in": "b" * 12}, ) ] - assert normalized(runs, examples)["score"] == 12 + evaluation_results = [EvaluationResult(key="foo", score=1)] * 12 + assert normalized(runs, examples, evaluation_results)["score"] == 12 def summary_eval_kwargs(*, runs, examples):