langchain-ai · hinthornw · Apr 10, 2024 · Apr 9, 2024 · Apr 9, 2024 · Apr 10, 2024
diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py
@@ -143,19 +143,42 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         ...     assert expected_output in some_input
 
 
+        LLM calls can be expensive and slow! To speed up your tests, you can cache
+        the results of your tests using the `cache` argument, or by setting the
+        `LANGCHAIN_TEST_CACHE` environment variable to "true". Note that this will
+        require that you install langsmith with the `vcr` extra:
+
+        pip install -U "langsmith[vcr]"
+
+        >>> @unit(cache=True)
+        ... def test_openai_generates_numbers():
+        ...     oai_client = wrap_openai(openai.Client())
+        ...     response = oai_client.chat.completions.create(
+        ...         model="gpt-3.5-turbo",
+        ...         messages=[
+        ...             {"role": "system", "content": "You are a helpful assistant."},
+        ...             {"role": "user", "content": "Generate a random number."},
+        ...         ],
+        ...     )
+        ...     content = response.choices[0].message.content
+        ...     assert any(char.isdigit() for char in content)
+
+
         To run these tests, use the pytest CLI. Or directly run the test functions.
         >>> test_addition()
         >>> test_with_fixture("Some input")
         >>> test_with_expected_output("Some input", "Some")
         >>> test_multiplication()
         >>> test_openai_says_hello()
         >>> test_addition_with_multiple_inputs(1, 2, 3)
+        >>> test_openai_generates_numbers()
     """
     langtest_extra = _UTExtra(
         id=kwargs.pop("id", None),
         output_keys=kwargs.pop("output_keys", None),
         client=kwargs.pop("client", None),
         test_suite_name=kwargs.pop("test_suite_name", None),
+        cache=_get_cache(kwargs.pop("cache", None)),
     )
     if kwargs:
         warnings.warn(f"Unexpected keyword arguments: {kwargs.keys()}")
@@ -188,7 +211,7 @@ def wrapper(*test_args, **test_kwargs):
 
 def _get_experiment_name() -> str:
     # TODO Make more easily configurable
-    prefix = ls_utils.get_tracer_project(False) or "TestSuite"
+    prefix = ls_utils.get_tracer_project(False) or "TestSuiteResult"
     name = f"{prefix}:{uuid.uuid4().hex[:8]}"
     return name
 
@@ -199,16 +222,22 @@ def _get_test_suite_name() -> str:
     if test_suite_name:
         return test_suite_name
     if __package__:
-        return __package__
+        return __package__ + " Test Suite"
     git_info = ls_env.get_git_info()
     if git_info:
         if git_info["remote_url"]:
             repo_name = git_info["remote_url"].split("/")[-1].split(".")[0]
             if repo_name:
-                return repo_name
+                return repo_name + " Test Suite"
     raise ValueError("Please set the LANGCHAIN_TEST_SUITE environment variable.")
 
 
+def _get_cache(do_cache: Optional[bool]) -> bool:
+    if do_cache is not None:
+        return do_cache
+    return os.environ.get("LANGCHAIN_TEST_CACHE", "false") == "true"
+
+
 def _get_test_suite(client: ls_client.Client) -> ls_schemas.Dataset:
     test_suite_name = _get_test_suite_name()
 
@@ -338,6 +367,7 @@ class _UTExtra(TypedDict, total=False):
     id: Optional[uuid.UUID]
     output_keys: Optional[Sequence[str]]
     test_suite_name: Optional[str]
+    cache: Optional[bool]
 
 
 def _ensure_example(
@@ -367,21 +397,46 @@ def _run_test(func, *test_args, langtest_extra: _UTExtra, **test_kwargs):
     )
     run_id = uuid.uuid4()
 
-    try:
-        func_ = func if rh.is_traceable_function(func) else rh.traceable(func)
-        func_(
-            *test_args,
-            **test_kwargs,
-            langsmith_extra={
-                "run_id": run_id,
-                "reference_example_id": example_id,
-                "project_name": test_suite.name,
-            },
+    def _test():
+        try:
+            func_ = func if rh.is_traceable_function(func) else rh.traceable(func)
+            func_(
+                *test_args,
+                **test_kwargs,
+                langsmith_extra={
+                    "run_id": run_id,
+                    "reference_example_id": example_id,
+                    "project_name": test_suite.name,
+                },
+            )
+        except BaseException as e:
+            test_suite.submit_result(run_id, error=repr(e))
+            raise e
+        try:
+            test_suite.submit_result(run_id, error=None)
+        except BaseException as e:
+            logger.warning(f"Failed to create feedback for run_id {run_id}: {e}")
+
+    if langtest_extra["cache"] is True:
+        try:
+            import vcr  # type: ignore[import-untyped]
+        except ImportError:
+            raise ImportError(
+                "vcrpy is required to use caching. Install with:"
+                'pip install -U "langsmith[vcr]"'
+            )
+        cache_dir = os.path.expanduser("~/.cache/langsmith/cassettes")
+        ls_vcr = vcr.VCR(
+            serializer="yaml",
+            cassette_library_dir=cache_dir,
+            ignore_hosts=[test_suite.client.api_url],
+            # Replay previous requests, record new ones
+            # TODO: Support other modes
+            record_mode="new_episodes",
+            match_on=["uri", "method", "path", "body"],
         )
-    except BaseException as e:
-        test_suite.submit_result(run_id, error=repr(e))
-        raise e
-    try:
-        test_suite.submit_result(run_id, error=None)
-    except BaseException as e:
-        logger.warning(f"Failed to create feedback for run_id {run_id}: {e}")
+
+        with ls_vcr.use_cassette(f"{test_suite.id}.yaml"):
+            _test()
+    else:
+        _test()
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -611,7 +611,9 @@ def info(self) -> ls_schemas.LangSmithInfo:
                 ls_utils.raise_for_status_with_text(response)
                 self._info = ls_schemas.LangSmithInfo(**response.json())
             except BaseException as e:
-                logger.warning(f"Failed to get info from {self.api_url}: {repr(e)}")
+                logger.warning(
+                    f"Failed to get info from {self.api_url}: {repr(e)}", exc_info=True
+                )
                 self._info = ls_schemas.LangSmithInfo()
         return self._info
 

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -806,7 +806,11 @@ def _run_evaluators(
         current_context = rh.get_tracing_context()
         metadata = {
             **(current_context["metadata"] or {}),
-            **{"experiment": self.experiment_name},
+            **{
+                "experiment": self.experiment_name,
+                "reference_example_id": current_results["example"].id,
+                "reference_run_id": current_results["run"].id,
+            },
         }
         with rh.tracing_context(
             **{**current_context, "project_name": "evaluators", "metadata": metadata}

diff --git a/python/poetry.lock b/python/poetry.lock
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.1.42"
+version = "0.1.41"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <[email protected]>"]
 license = "MIT"
@@ -30,7 +30,6 @@ pydantic = ">=1,<3"
 requests = "^2"
 orjson = "^3.9.14"
 
-
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.3.1"
 black = ">=23.3,<25.0"
@@ -54,6 +53,8 @@ types-tqdm = "^4.66.0.20240106"
 [tool.poetry.group.lint.dependencies]
 openai = "^1.10"
 
+[tool.poetry.extras]
+vcr = ["vcrpy"]
 
 [build-system]
 requires = ["poetry-core"]