Skip to content

Commit

Permalink
[Python] s/unit/test/g (#766)
Browse files Browse the repository at this point in the history
Most times, people use this decorator for integration-like testing.
Better to keep it generic as `@test`

We'll keep `@unit` around for backwards compat
  • Loading branch information
hinthornw authored Jun 6, 2024
1 parent 8bb007c commit 88ddd2a
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 36 deletions.
15 changes: 11 additions & 4 deletions python/langsmith/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

if TYPE_CHECKING:
from langsmith._expect import expect
from langsmith._testing import unit
from langsmith._testing import test, unit
from langsmith.client import Client
from langsmith.evaluation import aevaluate, evaluate
from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
Expand Down Expand Up @@ -44,10 +44,11 @@ def __getattr__(name: str) -> Any:
from langsmith.run_helpers import traceable

return traceable
elif name == "unit":
from langsmith._testing import unit

return unit
elif name == "test":
from langsmith._testing import test

return test

elif name == "expect":
from langsmith._expect import expect
Expand All @@ -62,6 +63,11 @@ def __getattr__(name: str) -> Any:

return aevaluate

elif name == "unit":
from langsmith._testing import unit

return unit

raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


Expand All @@ -74,6 +80,7 @@ def __getattr__(name: str) -> Any:
"traceable",
"trace",
"unit",
"test",
"expect",
"evaluate",
"aevaluate",
Expand Down
8 changes: 4 additions & 4 deletions python/langsmith/_expect.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""Make approximate assertions as "expectations" on test results.
This module is designed to be used within test cases decorated with the `@unit` decorator
This module is designed to be used within test cases decorated with the `@test` decorator
It allows you to log scores about a test case and optionally make assertions that log as
"expectation" feedback to LangSmith.
Example usage:
from langsmith import expect, unit
from langsmith import expect, test
@unit
@test
def test_output_semantically_close():
response = oai_client.chat.completions.create(
model="gpt-3.5-turbo",
Expand Down Expand Up @@ -37,7 +37,7 @@ def test_output_semantically_close():
# Or using a custom check
expect.value(response_txt).against(lambda x: "Hello" in x)
# You can even use this for basic metric logging within unit tests
# You can even use this for basic metric logging within tests
expect.score(0.8)
expect.score(0.7, key="similarity").to_be_greater_than(0.7)
Expand Down
36 changes: 20 additions & 16 deletions python/langsmith/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ class SkipException(Exception): # type: ignore[no-redef]


@overload
def unit(
def test(
func: Callable,
) -> Callable: ...


@overload
def unit(
def test(
*,
id: Optional[uuid.UUID] = None,
output_keys: Optional[Sequence[str]] = None,
Expand All @@ -55,8 +55,8 @@ def unit(
) -> Callable[[Callable], Callable]: ...


def unit(*args: Any, **kwargs: Any) -> Callable:
"""Create a unit test case in LangSmith.
def test(*args: Any, **kwargs: Any) -> Callable:
"""Create a test case in LangSmith.
This decorator is used to mark a function as a test case for LangSmith. It ensures
that the necessary example data is created and associated with the test function.
Expand Down Expand Up @@ -90,9 +90,9 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
without re-executing the code. Requires the 'langsmith[vcr]' package.
Example:
For basic usage, simply decorate a test function with `@unit`:
For basic usage, simply decorate a test function with `@test`:
>>> @unit
>>> @test
... def test_addition():
... assert 3 + 4 == 7
Expand All @@ -106,7 +106,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
... def generate_numbers():
... return 3, 4
>>> @unit
>>> @test
... def test_nested():
... # Traced code will be included in the test case
... a, b = generate_numbers()
Expand All @@ -128,7 +128,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
>>> import openai
>>> from langsmith.wrappers import wrap_openai
>>> oai_client = wrap_openai(openai.Client())
>>> @unit
>>> @test
... def test_openai_says_hello():
... # Traced code will be included in the test case
... response = oai_client.chat.completions.create(
Expand All @@ -144,7 +144,7 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
`expect` to score and make approximate assertions on your results.
>>> from langsmith import expect
>>> @unit
>>> @test
... def test_output_semantically_close():
... response = oai_client.chat.completions.create(
... model="gpt-3.5-turbo",
Expand All @@ -168,23 +168,23 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
... # And then log a pass/fail score to LangSmith
... ).to_be_less_than(1.0)
The `@unit` decorator works natively with pytest fixtures.
The `@test` decorator works natively with pytest fixtures.
The values will populate the "inputs" of the corresponding example in LangSmith.
>>> import pytest
>>> @pytest.fixture
... def some_input():
... return "Some input"
>>>
>>> @unit
>>> @test
... def test_with_fixture(some_input: str):
... assert "input" in some_input
>>>
You can still use pytest.parametrize() as usual to run multiple test cases
using the same test function.
>>> @unit(output_keys=["expected"])
>>> @test(output_keys=["expected"])
... @pytest.mark.parametrize(
... "a, b, expected",
... [
Expand All @@ -198,18 +198,18 @@ def unit(*args: Any, **kwargs: Any) -> Callable:
By default, each test case will be assigned a consistent, unique identifier
based on the function name and module. You can also provide a custom identifier
using the `id` argument:
>>> @unit(id="1a77e4b5-1d38-4081-b829-b0442cf3f145")
>>> @test(id="1a77e4b5-1d38-4081-b829-b0442cf3f145")
... def test_multiplication():
... assert 3 * 4 == 12
By default, all unit test inputs are saved as "inputs" to a dataset.
By default, all test test inputs are saved as "inputs" to a dataset.
You can specify the `output_keys` argument to persist those keys
within the dataset's "outputs" fields.
>>> @pytest.fixture
... def expected_output():
... return "input"
>>> @unit(output_keys=["expected_output"])
>>> @test(output_keys=["expected_output"])
... def test_with_expected_output(some_input: str, expected_output: str):
... assert expected_output in some_input
Expand Down Expand Up @@ -299,7 +299,7 @@ def _get_test_suite(
return client.read_dataset(dataset_name=test_suite_name)
else:
repo = ls_env.get_git_info().get("remote_url") or ""
description = "Unit test suite"
description = "Test suite"
if repo:
description += f" for {repo}"
return client.create_dataset(
Expand Down Expand Up @@ -675,3 +675,7 @@ async def _test():
cache_path, ignore_hosts=[test_suite.client.api_url]
):
await _test()


# For backwards compatibility
unit = test
20 changes: 10 additions & 10 deletions python/tests/evaluation/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from langsmith import Client, aevaluate, evaluate, expect, unit
from langsmith import Client, aevaluate, evaluate, expect, test
from langsmith.schemas import Example, Run


Expand Down Expand Up @@ -95,7 +95,7 @@ async def apredict(inputs: dict) -> dict:
assert count == 2


@unit
@test
def test_foo():
expect(3 + 4).to_equal(7)

Expand All @@ -110,53 +110,53 @@ def expected_output():
return "input"


@unit(output_keys=["expected_output"])
@test(output_keys=["expected_output"])
def test_bar(some_input: str, expected_output: str):
expect(some_input).to_contain(expected_output)


@unit
@test
async def test_baz():
await asyncio.sleep(0.1)
expect(3 + 4).to_equal(7)
return 7


@unit
@test
@pytest.mark.parametrize("x, y", [(1, 2), (2, 3)])
def test_foo_parametrized(x, y):
expect(x + y).to_be_greater_than(0)
return x + y


@unit(output_keys=["z"])
@test(output_keys=["z"])
@pytest.mark.parametrize("x, y, z", [(1, 2, 3), (2, 3, 5)])
def test_bar_parametrized(x, y, z):
expect(x + y).to_equal(z)
return {"z": x + y}


@unit(test_suite_name="tests.evaluation.test_evaluation::test_foo_async_parametrized")
@test(test_suite_name="tests.evaluation.test_evaluation::test_foo_async_parametrized")
@pytest.mark.parametrize("x, y", [(1, 2), (2, 3)])
async def test_foo_async_parametrized(x, y):
await asyncio.sleep(0.1)
expect(x + y).to_be_greater_than(0)
return x + y


@unit(output_keys=["z"])
@test(output_keys=["z"])
@pytest.mark.parametrize("x, y, z", [(1, 2, 3), (2, 3, 5)])
async def test_bar_async_parametrized(x, y, z):
await asyncio.sleep(0.1)
expect(x + y).to_equal(z)
return {"z": x + y}


@unit
@test
def test_pytest_skip():
pytest.skip("Skip this test")


@unit
@test
async def test_async_pytest_skip():
pytest.skip("Skip this test")
4 changes: 2 additions & 2 deletions python/tests/external/test_instructor_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from openai import AsyncOpenAI
from pydantic import BaseModel

from langsmith import unit
from langsmith import test


class Models(str, Enum):
Expand Down Expand Up @@ -58,7 +58,7 @@ class ClassifySpam(BaseModel):


@pytest.mark.asyncio_cooperative
@unit()
@test()
@pytest.mark.parametrize("client, data", d[:3])
async def test_classification(client, data):
input, expected = data
Expand Down

0 comments on commit 88ddd2a

Please sign in to comment.