diff --git a/python/langsmith/evaluation/integrations/_langchain.py b/python/langsmith/evaluation/integrations/_langchain.py index 9478ef653..3d4baa62f 100644 --- a/python/langsmith/evaluation/integrations/_langchain.py +++ b/python/langsmith/evaluation/integrations/_langchain.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, TypedDict, Union -from langsmith.evaluation.evaluator import run_evaluator +from langsmith.evaluation.evaluator import DynamicRunEvaluator from langsmith.run_helpers import traceable from langsmith.schemas import Example, Run @@ -260,4 +260,14 @@ def evaluate(run: Run, example: Optional[Example] = None) -> dict: results = self.evaluator.evaluate_strings(**eval_inputs) return {"key": self.evaluator.evaluation_name, **results} - return run_evaluator(evaluate) + @traceable(name=self.evaluator.evaluation_name) + async def aevaluate(run: Run, example: Optional[Example] = None) -> dict: + eval_inputs = ( + prepare_evaluator_inputs(run, example) + if self._prepare_data is None + else self._prepare_data(run, example) + ) + results = await self.evaluator.aevaluate_strings(**eval_inputs) + return {"key": self.evaluator.evaluation_name, **results} + + return DynamicRunEvaluator(evaluate, aevaluate) diff --git a/python/pyproject.toml b/python/pyproject.toml index 0f131fbfc..9713d17c2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langsmith" -version = "0.1.118" +version = "0.1.119" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." authors = ["LangChain "] license = "MIT" diff --git a/python/tests/unit_tests/evaluation/test_evaluator.py b/python/tests/unit_tests/evaluation/test_evaluator.py index 72f6b2cb1..f0ec211eb 100644 --- a/python/tests/unit_tests/evaluation/test_evaluator.py +++ b/python/tests/unit_tests/evaluation/test_evaluator.py @@ -1,6 +1,7 @@ import asyncio import logging from typing import Any, Optional +from unittest import mock from unittest.mock import MagicMock import pytest @@ -14,6 +15,7 @@ Run, run_evaluator, ) +from langsmith.evaluation.integrations._langchain import LangChainStringEvaluator from langsmith.run_helpers import tracing_context @@ -360,3 +362,16 @@ def test_check_value_non_numeric(caplog): "Numeric values should be provided in the 'score' field, not 'value'." not in caplog.text ) + + +def test_langchain_run_evaluator_native_async(): + try: + from langchain.evaluation import load_evaluator # noqa + except ImportError: + pytest.skip("Skipping test that requires langchain") + + with mock.patch.dict("os.environ", {"OPENAI_API_KEY": "fake_api_key"}): + res = LangChainStringEvaluator(evaluator="qa") + run_evaluator = res.as_run_evaluator() + assert hasattr(run_evaluator, "afunc") + assert hasattr(run_evaluator, "func")