test fix

langchain-ai · Dec 9, 2024 · 23187f1 · 23187f1
1 parent b524f72
commit 23187f1
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 17 deletions.
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -3843,10 +3843,9 @@ def read_example(
                     "presigned_url": value["presigned_url"],
                     "reader": reader,
                 }
-        del example["attachment_urls"]
 
         return ls_schemas.Example(
-            **example,
+            **{k: v for k, v in example.items() if k != "attachment_urls"},
             attachments=attachments,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
@@ -3930,10 +3929,9 @@ def list_examples(
                         "presigned_url": value["presigned_url"],
                         "reader": reader,
                     }
-            del example["attachment_urls"]
 
             yield ls_schemas.Example(
-                **example,
+                **{k: v for k, v in example.items() if k != "attachment_urls"},
                 attachments=attachments,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),

diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
@@ -11,7 +11,7 @@
 import uuid
 from datetime import datetime, timezone
 from threading import Lock
-from typing import Callable, List
+from typing import Any, Callable, Dict, List, Tuple
 from unittest import mock
 from unittest.mock import MagicMock
 
@@ -53,7 +53,9 @@ def request(self, verb: str, endpoint: str, *args, **kwargs):
                 return res
             elif endpoint == "http://localhost:1984/examples":
                 res = MagicMock()
-                res.json.return_value = [e.dict() for e in self.ds_examples]
+                res.json.return_value = [
+                    e.dict() if not isinstance(e, dict) else e for e in self.ds_examples
+                ]
                 return res
             elif endpoint == "http://localhost:1984/sessions":
                 res = {}  # type: ignore
@@ -143,14 +145,23 @@ def _wait_until(condition: Callable, timeout: int = 8):
     raise TimeoutError("Condition not met")
 
 
-def _create_example(idx: int) -> ls_schemas.Example:
+def _create_example(idx: int) -> Tuple[ls_schemas.Example, Dict[str, Any]]:
+    _id = uuid.uuid4()
+    _created_at = datetime.now(timezone.utc)
     return ls_schemas.Example(
-        id=uuid.uuid4(),
+        id=_id,
         inputs={"in": idx},
         outputs={"answer": idx + 1},
         dataset_id="00886375-eb2a-4038-9032-efff60309896",
-        created_at=datetime.now(timezone.utc),
-    )
+        created_at=_created_at,
+    ), {
+        "id": _id,
+        "dataset_id": "00886375-eb2a-4038-9032-efff60309896",
+        "created_at": _created_at,
+        "inputs": {"in": idx},
+        "outputs": {"answer": idx + 1},
+        "attachment_urls": None,
+    }
 
 
 @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
@@ -166,10 +177,13 @@ def test_evaluate_results(
 
     SPLIT_SIZE = 3
     NUM_REPETITIONS = 4
-    ds_examples = [_create_example(i) for i in range(10)]
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
     dev_split = random.sample(ds_examples, SPLIT_SIZE)
     tenant_id = str(uuid.uuid4())
-    fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id)
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
     session.request = fake_request.request
     client = Client(
         api_url="http://localhost:1984",
@@ -393,7 +407,12 @@ def eval2(x, y, inputs):
             _normalize_evaluator_func(eval_)
 
         with pytest.raises(ValueError, match="Invalid evaluator function."):
-            evaluate((lambda x: x), data=ds_examples, evaluators=[eval_], client=client)
+            evaluate(
+                (lambda inputs: inputs),
+                data=ds_examples,
+                evaluators=[eval_],
+                client=client,
+            )
 
 
 def test_evaluate_raises_for_async():
@@ -437,10 +456,13 @@ async def test_aevaluate_results(
 
     SPLIT_SIZE = 3
     NUM_REPETITIONS = 4
-    ds_examples = [_create_example(i) for i in range(10)]
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
     dev_split = random.sample(ds_examples, SPLIT_SIZE)
     tenant_id = str(uuid.uuid4())
-    fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id)
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
     session.request = fake_request.request
     client = Client(
         api_url="http://localhost:1984",
@@ -664,8 +686,8 @@ async def eval2(x, y, inputs):
 
     evaluators = [eval1, eval2]
 
-    async def atarget(x):
-        return x
+    async def atarget(inputs):
+        return inputs
 
     for eval_ in evaluators:
         with pytest.raises(ValueError, match="Invalid evaluator function."):