diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml
index d806fc979..487df1a28 100644
--- a/.github/actions/python-integration-tests/action.yml
+++ b/.github/actions/python-integration-tests/action.yml
@@ -52,7 +52,19 @@ runs:
         LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key }}
         OPENAI_API_KEY: ${{ inputs.openai-api-key }}
         ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }}
-        LANGCHAIN_TEST_CACHE: "tests/cassettes"
       run: make doctest
       shell: bash
       working-directory: python
+    
+
+    - name: Run Evaluation
+      env:
+          LANGCHAIN_TRACING_V2: "true"
+          LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key }}
+          OPENAI_API_KEY: ${{ inputs.openai-api-key }}
+          ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }}
+          LANGCHAIN_TEST_CACHE: "tests/cassettes"
+      run: make doctest
+      shell: bash
+      working-directory: python
+
diff --git a/python/Makefile b/python/Makefile
index 6586c6d47..9f94856f6 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -1,4 +1,4 @@
-.PHONY: tests lint format
+.PHONY: tests lint format build publish doctest integration_tests integration_tests_fast evals
 
 tests:
 	poetry run pytest -n auto --durations=10 tests/unit_tests
@@ -15,6 +15,9 @@ integration_tests_fast:
 doctest:
 	poetry run pytest -n auto --durations=10 --doctest-modules langsmith
 
+evals:
+	poetry run pytest -n auto tests/evaluation
+
 lint:
 	poetry run ruff check .
 	poetry run mypy .
diff --git a/python/langsmith/__init__.py b/python/langsmith/__init__.py
index 81dfec62e..b42450cdf 100644
--- a/python/langsmith/__init__.py
+++ b/python/langsmith/__init__.py
@@ -9,6 +9,7 @@
     from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
     from langsmith.run_helpers import trace, traceable
     from langsmith.run_trees import RunTree
+    from langsmith.evaluation import evaluate, aevaluate
 
 
 def __getattr__(name: str) -> Any:
@@ -52,6 +53,14 @@ def __getattr__(name: str) -> Any:
         from langsmith._expect import expect
 
         return expect
+    elif name == "evaluate":
+        from langsmith.evaluation import evaluate
+
+        return evaluate
+    elif name == "aevaluate":
+        from langsmith.evaluation import aevaluate
+
+        return aevaluate
 
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
@@ -66,4 +75,6 @@ def __getattr__(name: str) -> Any:
     "trace",
     "unit",
     "expect",
+    "evaluate",
+    "aevaluate",
 ]