[Python] s/unit/test/g (#766)

Most times, people use this decorator for integration-like testing. Better to keep it generic as `@test` We'll keep `@unit` around for backwards compat
langchain-ai · Jun 6, 2024 · 88ddd2a · 88ddd2a
1 parent 8bb007c
commit 88ddd2a
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 36 deletions.
diff --git a/python/langsmith/__init__.py b/python/langsmith/__init__.py
@@ -4,7 +4,7 @@
 
 if TYPE_CHECKING:
     from langsmith._expect import expect
-    from langsmith._testing import unit
+    from langsmith._testing import test, unit
     from langsmith.client import Client
     from langsmith.evaluation import aevaluate, evaluate
     from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
@@ -44,10 +44,11 @@ def __getattr__(name: str) -> Any:
         from langsmith.run_helpers import traceable
 
         return traceable
-    elif name == "unit":
-        from langsmith._testing import unit
 
-        return unit
+    elif name == "test":
+        from langsmith._testing import test
+
+        return test
 
     elif name == "expect":
         from langsmith._expect import expect
@@ -62,6 +63,11 @@ def __getattr__(name: str) -> Any:
 
         return aevaluate
 
+    elif name == "unit":
+        from langsmith._testing import unit
+
+        return unit
+
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
@@ -74,6 +80,7 @@ def __getattr__(name: str) -> Any:
     "traceable",
     "trace",
     "unit",
+    "test",
     "expect",
     "evaluate",
     "aevaluate",

diff --git a/python/langsmith/_expect.py b/python/langsmith/_expect.py
@@ -1,14 +1,14 @@
 """Make approximate assertions as "expectations" on test results.
 
-This module is designed to be used within test cases decorated with the `@unit` decorator
+This module is designed to be used within test cases decorated with the `@test` decorator
 It allows you to log scores about a test case and optionally make assertions that log as
 "expectation" feedback to LangSmith.
 
 Example usage:
 
-    from langsmith import expect, unit
+    from langsmith import expect, test
 
-    @unit
+    @test
     def test_output_semantically_close():
         response = oai_client.chat.completions.create(
             model="gpt-3.5-turbo",
@@ -37,7 +37,7 @@ def test_output_semantically_close():
         # Or using a custom check
         expect.value(response_txt).against(lambda x: "Hello" in x)
 
-        # You can even use this for basic metric logging within unit tests
+        # You can even use this for basic metric logging within tests
 
         expect.score(0.8)
         expect.score(0.7, key="similarity").to_be_greater_than(0.7)

diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py
@@ -40,13 +40,13 @@ class SkipException(Exception):  # type: ignore[no-redef]
 
 
 @overload
-def unit(
+def test(
     func: Callable,
 ) -> Callable: ...
 
 
 @overload
-def unit(
+def test(
     *,
     id: Optional[uuid.UUID] = None,
     output_keys: Optional[Sequence[str]] = None,
@@ -55,8 +55,8 @@ def unit(
 ) -> Callable[[Callable], Callable]: ...
 
 
-def unit(*args: Any, **kwargs: Any) -> Callable:
-    """Create a unit test case in LangSmith.
+def test(*args: Any, **kwargs: Any) -> Callable:
+    """Create a test case in LangSmith.
 
     This decorator is used to mark a function as a test case for LangSmith. It ensures
     that the necessary example data is created and associated with the test function.
@@ -90,9 +90,9 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
              without re-executing the code. Requires the 'langsmith[vcr]' package.
 
     Example:
-        For basic usage, simply decorate a test function with `@unit`:
+        For basic usage, simply decorate a test function with `@test`:
 
-        >>> @unit
+        >>> @test
         ... def test_addition():
         ...     assert 3 + 4 == 7
 
@@ -106,7 +106,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         ... def generate_numbers():
         ...     return 3, 4
 
-        >>> @unit
+        >>> @test
         ... def test_nested():
         ...     # Traced code will be included in the test case
         ...     a, b = generate_numbers()
@@ -128,7 +128,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         >>> import openai
         >>> from langsmith.wrappers import wrap_openai
         >>> oai_client = wrap_openai(openai.Client())
-        >>> @unit
+        >>> @test
         ... def test_openai_says_hello():
         ...     # Traced code will be included in the test case
         ...     response = oai_client.chat.completions.create(
@@ -144,7 +144,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         `expect` to score and make approximate assertions on your results.
 
         >>> from langsmith import expect
-        >>> @unit
+        >>> @test
         ... def test_output_semantically_close():
         ...     response = oai_client.chat.completions.create(
         ...         model="gpt-3.5-turbo",
@@ -168,23 +168,23 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         ...         # And then log a pass/fail score to LangSmith
         ...     ).to_be_less_than(1.0)
 
-        The `@unit` decorator works natively with pytest fixtures.
+        The `@test` decorator works natively with pytest fixtures.
         The values will populate the "inputs" of the corresponding example in LangSmith.
 
         >>> import pytest
         >>> @pytest.fixture
         ... def some_input():
         ...     return "Some input"
         >>>
-        >>> @unit
+        >>> @test
         ... def test_with_fixture(some_input: str):
         ...     assert "input" in some_input
         >>>
 
         You can still use pytest.parametrize() as usual to run multiple test cases
         using the same test function.
 
-        >>> @unit(output_keys=["expected"])
+        >>> @test(output_keys=["expected"])
         ... @pytest.mark.parametrize(
         ...     "a, b, expected",
         ...     [
@@ -198,18 +198,18 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
         By default, each test case will be assigned a consistent, unique identifier
         based on the function name and module. You can also provide a custom identifier
         using the `id` argument:
-        >>> @unit(id="1a77e4b5-1d38-4081-b829-b0442cf3f145")
+        >>> @test(id="1a77e4b5-1d38-4081-b829-b0442cf3f145")
         ... def test_multiplication():
         ...     assert 3 * 4 == 12
 
-        By default, all unit test inputs are saved as "inputs" to a dataset.
+        By default, all test test inputs are saved as "inputs" to a dataset.
         You can specify the `output_keys` argument to persist those keys
         within the dataset's "outputs" fields.
 
         >>> @pytest.fixture
         ... def expected_output():
         ...     return "input"
-        >>> @unit(output_keys=["expected_output"])
+        >>> @test(output_keys=["expected_output"])
         ... def test_with_expected_output(some_input: str, expected_output: str):
         ...     assert expected_output in some_input
 
@@ -299,7 +299,7 @@ def _get_test_suite(
         return client.read_dataset(dataset_name=test_suite_name)
     else:
         repo = ls_env.get_git_info().get("remote_url") or ""
-        description = "Unit test suite"
+        description = "Test suite"
         if repo:
             description += f" for {repo}"
         return client.create_dataset(
@@ -675,3 +675,7 @@ async def _test():
         cache_path, ignore_hosts=[test_suite.client.api_url]
     ):
         await _test()
+
+
+# For backwards compatibility
+unit = test
diff --git a/python/tests/evaluation/test_evaluation.py b/python/tests/evaluation/test_evaluation.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from langsmith import Client, aevaluate, evaluate, expect, unit
+from langsmith import Client, aevaluate, evaluate, expect, test
 from langsmith.schemas import Example, Run
 
 
@@ -95,7 +95,7 @@ async def apredict(inputs: dict) -> dict:
         assert count == 2
 
 
-@unit
+@test
 def test_foo():
     expect(3 + 4).to_equal(7)
 
@@ -110,53 +110,53 @@ def expected_output():
     return "input"
 
 
-@unit(output_keys=["expected_output"])
+@test(output_keys=["expected_output"])
 def test_bar(some_input: str, expected_output: str):
     expect(some_input).to_contain(expected_output)
 
 
-@unit
+@test
 async def test_baz():
     await asyncio.sleep(0.1)
     expect(3 + 4).to_equal(7)
     return 7
 
 
-@unit
+@test
 @pytest.mark.parametrize("x, y", [(1, 2), (2, 3)])
 def test_foo_parametrized(x, y):
     expect(x + y).to_be_greater_than(0)
     return x + y
 
 
-@unit(output_keys=["z"])
+@test(output_keys=["z"])
 @pytest.mark.parametrize("x, y, z", [(1, 2, 3), (2, 3, 5)])
 def test_bar_parametrized(x, y, z):
     expect(x + y).to_equal(z)
     return {"z": x + y}
 
 
-@unit(test_suite_name="tests.evaluation.test_evaluation::test_foo_async_parametrized")
+@test(test_suite_name="tests.evaluation.test_evaluation::test_foo_async_parametrized")
 @pytest.mark.parametrize("x, y", [(1, 2), (2, 3)])
 async def test_foo_async_parametrized(x, y):
     await asyncio.sleep(0.1)
     expect(x + y).to_be_greater_than(0)
     return x + y
 
 
-@unit(output_keys=["z"])
+@test(output_keys=["z"])
 @pytest.mark.parametrize("x, y, z", [(1, 2, 3), (2, 3, 5)])
 async def test_bar_async_parametrized(x, y, z):
     await asyncio.sleep(0.1)
     expect(x + y).to_equal(z)
     return {"z": x + y}
 
 
-@unit
+@test
 def test_pytest_skip():
     pytest.skip("Skip this test")
 
 
-@unit
+@test
 async def test_async_pytest_skip():
     pytest.skip("Skip this test")
diff --git a/python/tests/external/test_instructor_evals.py b/python/tests/external/test_instructor_evals.py
@@ -8,7 +8,7 @@
 from openai import AsyncOpenAI
 from pydantic import BaseModel
 
-from langsmith import unit
+from langsmith import test
 
 
 class Models(str, Enum):
@@ -58,7 +58,7 @@ class ClassifySpam(BaseModel):
 
 
 @pytest.mark.asyncio_cooperative
-@unit()
+@test()
 @pytest.mark.parametrize("client, data", d[:3])
 async def test_classification(client, data):
     input, expected = data