From f5624712bc73ceb04aee70b211b59e7ca1c1594d Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Wed, 9 Oct 2024 17:20:52 -0700
Subject: [PATCH] Better async error message (#1080)

---
 python/langsmith/evaluation/_runner.py        | 12 +++++++
 .../unit_tests/evaluation/test_runner.py      | 32 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index a040ea7a3..d076869cc 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -253,6 +253,18 @@ def evaluate(
         ... )  # doctest: +ELLIPSIS
         View the evaluation results for experiment:...
     """  # noqa: E501
+    if callable(target) and rh.is_async(target):
+        raise ValueError(
+            "Async functions are not supported by `evaluate`. "
+            "Please use `aevaluate` instead:\n\n"
+            "from langsmith import aevaluate\n\n"
+            "await aevaluate(\n"
+            "    async_target_function,\n"
+            "    data=data,\n"
+            "    evaluators=evaluators,\n"
+            "    # ... other parameters\n"
+            ")"
+        )
     if experiment and experiment_prefix:
         raise ValueError(
             "Expected at most one of 'experiment' or 'experiment_prefix',"
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 1229590c9..d20960d3e 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -1,6 +1,7 @@
 """Test the eval runner."""
 
 import asyncio
+import functools
 import itertools
 import json
 import random
@@ -248,6 +249,37 @@ def score_value(run, example):
     assert not fake_request.should_fail
 
 
+def test_evaluate_raises_for_async():
+    async def my_func(inputs: dict):
+        pass
+
+    match = "Async functions are not supported by"
+    with pytest.raises(ValueError, match=match):
+        evaluate(my_func, data="foo")
+
+    async def my_other_func(inputs: dict, other_val: int):
+        pass
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(my_other_func, other_val=3), data="foo")
+
+    try:
+        from langchain_core.runnables import RunnableLambda
+    except ImportError:
+        pytest.skip("langchain-core not installed.")
+
+    @RunnableLambda
+    def foo(inputs: dict):
+        return "bar"
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(foo.ainvoke, data="foo")
+    if sys.version_info < (3, 10):
+        return
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(foo.ainvoke, inputs={"foo": "bar"}), data="foo")
+
+
 @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
 @pytest.mark.parametrize("blocking", [False, True])
 async def test_aevaluate_results(blocking: bool) -> None: