diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml index d806fc979..487df1a28 100644 --- a/.github/actions/python-integration-tests/action.yml +++ b/.github/actions/python-integration-tests/action.yml @@ -52,7 +52,19 @@ runs: LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key }} OPENAI_API_KEY: ${{ inputs.openai-api-key }} ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }} - LANGCHAIN_TEST_CACHE: "tests/cassettes" run: make doctest shell: bash working-directory: python + + + - name: Run Evaluation + env: + LANGCHAIN_TRACING_V2: "true" + LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key }} + OPENAI_API_KEY: ${{ inputs.openai-api-key }} + ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }} + LANGCHAIN_TEST_CACHE: "tests/cassettes" + run: make doctest + shell: bash + working-directory: python + diff --git a/python/Makefile b/python/Makefile index 6586c6d47..9f94856f6 100644 --- a/python/Makefile +++ b/python/Makefile @@ -1,4 +1,4 @@ -.PHONY: tests lint format +.PHONY: tests lint format build publish doctest integration_tests integration_tests_fast evals tests: poetry run pytest -n auto --durations=10 tests/unit_tests @@ -15,6 +15,9 @@ integration_tests_fast: doctest: poetry run pytest -n auto --durations=10 --doctest-modules langsmith +evals: + poetry run pytest -n auto tests/evaluation + lint: poetry run ruff check . poetry run mypy . diff --git a/python/langsmith/__init__.py b/python/langsmith/__init__.py index 81dfec62e..b42450cdf 100644 --- a/python/langsmith/__init__.py +++ b/python/langsmith/__init__.py @@ -9,6 +9,7 @@ from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator from langsmith.run_helpers import trace, traceable from langsmith.run_trees import RunTree + from langsmith.evaluation import evaluate, aevaluate def __getattr__(name: str) -> Any: @@ -52,6 +53,14 @@ def __getattr__(name: str) -> Any: from langsmith._expect import expect return expect + elif name == "evaluate": + from langsmith.evaluation import evaluate + + return evaluate + elif name == "aevaluate": + from langsmith.evaluation import aevaluate + + return aevaluate raise AttributeError(f"module {__name__!r} has no attribute {name!r}") @@ -66,4 +75,6 @@ def __getattr__(name: str) -> Any: "trace", "unit", "expect", + "evaluate", + "aevaluate", ]