[Python] Dynamic async evaluator for langchain off-the-shelf (#1001)

langchain-ai · Sep 12, 2024 · 30365bc · 30365bc
1 parent 9044996
commit 30365bc
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 3 deletions.
diff --git a/python/langsmith/evaluation/integrations/_langchain.py b/python/langsmith/evaluation/integrations/_langchain.py
@@ -2,7 +2,7 @@
 
 from typing import TYPE_CHECKING, Any, Callable, Optional, TypedDict, Union
 
-from langsmith.evaluation.evaluator import run_evaluator
+from langsmith.evaluation.evaluator import DynamicRunEvaluator
 from langsmith.run_helpers import traceable
 from langsmith.schemas import Example, Run
 
@@ -260,4 +260,14 @@ def evaluate(run: Run, example: Optional[Example] = None) -> dict:
             results = self.evaluator.evaluate_strings(**eval_inputs)
             return {"key": self.evaluator.evaluation_name, **results}
 
-        return run_evaluator(evaluate)
+        @traceable(name=self.evaluator.evaluation_name)
+        async def aevaluate(run: Run, example: Optional[Example] = None) -> dict:
+            eval_inputs = (
+                prepare_evaluator_inputs(run, example)
+                if self._prepare_data is None
+                else self._prepare_data(run, example)
+            )
+            results = await self.evaluator.aevaluate_strings(**eval_inputs)
+            return {"key": self.evaluator.evaluation_name, **results}
+
+        return DynamicRunEvaluator(evaluate, aevaluate)
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.1.118"
+version = "0.1.119"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <[email protected]>"]
 license = "MIT"

diff --git a/python/tests/unit_tests/evaluation/test_evaluator.py b/python/tests/unit_tests/evaluation/test_evaluator.py
@@ -1,6 +1,7 @@
 import asyncio
 import logging
 from typing import Any, Optional
+from unittest import mock
 from unittest.mock import MagicMock
 
 import pytest
@@ -14,6 +15,7 @@
     Run,
     run_evaluator,
 )
+from langsmith.evaluation.integrations._langchain import LangChainStringEvaluator
 from langsmith.run_helpers import tracing_context
 
 
@@ -360,3 +362,16 @@ def test_check_value_non_numeric(caplog):
         "Numeric values should be provided in the 'score' field, not 'value'."
         not in caplog.text
     )
+
+
+def test_langchain_run_evaluator_native_async():
+    try:
+        from langchain.evaluation import load_evaluator  # noqa
+    except ImportError:
+        pytest.skip("Skipping test that requires langchain")
+
+    with mock.patch.dict("os.environ", {"OPENAI_API_KEY": "fake_api_key"}):
+        res = LangChainStringEvaluator(evaluator="qa")
+    run_evaluator = res.as_run_evaluator()
+    assert hasattr(run_evaluator, "afunc")
+    assert hasattr(run_evaluator, "func")