From 6c23581fc57b62971446999ce66e1bf3a96c803b Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Fri, 22 Nov 2024 21:17:41 -0500
Subject: [PATCH] fmt

---
 .../manage_datasets_programmatically.mdx      |  6 ++---
 .../how_to_guides/evaluation/async.mdx        | 26 ++++++++-----------
 .../evaluation/builtin_evaluators.mdx         |  0
 .../evaluation/evaluate_llm_application.mdx   |  4 +--
 .../how_to_guides/evaluation/langgraph.mdx    | 10 +++----
 docs/evaluation/how_to_guides/index.md        |  2 +-
 6 files changed, 22 insertions(+), 26 deletions(-)
 delete mode 100644 docs/evaluation/how_to_guides/evaluation/builtin_evaluators.mdx
diff --git a/docs/evaluation/how_to_guides/datasets/manage_datasets_programmatically.mdx b/docs/evaluation/how_to_guides/datasets/manage_datasets_programmatically.mdx
index c6daa3be..08a9627a 100644
--- a/docs/evaluation/how_to_guides/datasets/manage_datasets_programmatically.mdx
+++ b/docs/evaluation/how_to_guides/datasets/manage_datasets_programmatically.mdx
@@ -382,9 +382,9 @@ Additionally, you can also chain multiple filters together using the `and` opera
   tabs={[
     PythonBlock(
       `examples = client.list_examples(
-                dataset_name=dataset_name,
-                filter='and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'
-            )`
+    dataset_name=dataset_name,
+    filter='and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'
+)`
     ),
     TypeScriptBlock(
       `const examples = await client.listExamples({datasetName: datasetName, filter: 'and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'});`
diff --git a/docs/evaluation/how_to_guides/evaluation/async.mdx b/docs/evaluation/how_to_guides/evaluation/async.mdx
index 92ab9f2c..6741c635 100644
--- a/docs/evaluation/how_to_guides/evaluation/async.mdx
+++ b/docs/evaluation/how_to_guides/evaluation/async.mdx
@@ -1,8 +1,4 @@
-import {
-  CodeTabs,
-  python,
-  typescript,
-} from "@site/src/components/InstructionsWithCode";
+import { CodeTabs, python } from "@site/src/components/InstructionsWithCode";
 
 # How to run an evaluation asynchronously
 
@@ -29,7 +25,7 @@ You can see how to use it [here](../../how_to_guides/evaluation/evaluate_llm_app
 <CodeTabs
   groupId="client-language"
   tabs={[
-    python`
+    python({caption: "Requires `langsmith>=0.1.145`"})`
         from langsmith import aevaluate, wrappers, Client
         from openai import AsyncOpenAI
 
@@ -57,22 +53,22 @@ list 5 concrete questions that should be investigated to determine if the idea i
             return len(output["output"]) < 3 * len(inputs["idea"])
 
         ls_client = Client()
-        # TODO
-        dataset = ...
 
-        results = aevaluate(
+        examples = ["universal basic income", "nuclear fusion", "hyperloop", "nuclear powered rockets"]
+        dataset = ls_client.create_dataset("research ideas")
+        ls_client.create_examples(
+            dataset_name=dataset.name,
+            inputs=[{"idea": e} for e in examples,
+        )
+
+        results = await aevaluate(
             researcher_app,
             data=dataset,
             evaluators=[concise],
             max_concurrency=2,  # Optional, no max by default
-            experiment_prefix="gpt-4o-mini, baseline"  # Optional, random by default
+            experiment_prefix="gpt-4o-mini-baseline"  # Optional, random by default
         )
     `,
-    typescript`
-      import type { EvaluationResult } from "langsmith/evaluation";
-      import type { Run, Example } from "langsmith/schemas";
-
-    `,
 
 ]}
 />
diff --git a/docs/evaluation/how_to_guides/evaluation/builtin_evaluators.mdx b/docs/evaluation/how_to_guides/evaluation/builtin_evaluators.mdx
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/evaluation/how_to_guides/evaluation/evaluate_llm_application.mdx b/docs/evaluation/how_to_guides/evaluation/evaluate_llm_application.mdx
index 822e381d..b2bf0361 100644
--- a/docs/evaluation/how_to_guides/evaluation/evaluate_llm_application.mdx
+++ b/docs/evaluation/how_to_guides/evaluation/evaluate_llm_application.mdx
@@ -12,7 +12,7 @@ import {
 
 :::info Key concepts
 
-[Evaluations](../../concepts#applying-evaluations) | [Evaluators](../../concepts#evaluators) | [Datasets](../../concepts#datasets) | [Experiments](../../concepts#experiments)
+[Evaluations](../../concepts#applying-evaluations) | [Evaluators](../../concepts#evaluators) | [Datasets](../../concepts#datasets)
 
 :::
 
@@ -232,7 +232,7 @@ _If you've annotated your code for tracing, you can open the trace of each row i
 <CodeTabs
   groupId="client-language"
   tabs={[
-    python`
+    python({caption: "Requires `langsmith>=0.1.145`"})`
       from langsmith import Client, evaluate, traceable, wrappers
       from openai import OpenAI
       
diff --git a/docs/evaluation/how_to_guides/evaluation/langgraph.mdx b/docs/evaluation/how_to_guides/evaluation/langgraph.mdx
index 82ae0e95..4f6429fe 100644
--- a/docs/evaluation/how_to_guides/evaluation/langgraph.mdx
+++ b/docs/evaluation/how_to_guides/evaluation/langgraph.mdx
@@ -204,7 +204,7 @@ If any of you nodes are defined as async, you'll need to use `aevaluate`
     # Remember that langgraph graphs are also langchain runnables.
     target = example_to_state | app
 
-    experiment_results = aevaluate(
+    experiment_results = await aevaluate(
         target,
         data="weather agent",
         evaluators=[correct],
@@ -236,7 +236,7 @@ For example, we can look at the messages to assert that the model invoked the 's
         tool_calls = outputs["messages"][1].tool_calls
         return bool(tool_calls and tool_calls[0]["name"] == "search")
 
-    experiment_results = aevaluate(
+    experiment_results = await aevaluate(
         target,
         data="weather agent",
         evaluators=[correct, right_tool],
@@ -272,7 +272,7 @@ See more about what arguments you can pass to custom evaluators in this [how-to
         right_tool = bool(tool_calls and tool_calls[0]["name"] == "search")
         return {"key": "right_tool", "value": right_tool}
 
-    experiment_results = aevaluate(
+    experiment_results = await aevaluate(
         target,
         data="weather agent",
         evaluators=[correct, right_tool_from_run],
@@ -299,7 +299,7 @@ In this case we can even continue using the evaluators we've been using.
     
     node_target = example_to_state | app.nodes["agent"]
 
-    node_experiment_results = aevaluate(
+    node_experiment_results = await aevaluate(
         node_target,
         data="weather agent",
         evaluators=[right_tool_from_run],
@@ -450,7 +450,7 @@ In this case we can even continue using the evaluators we've been using.
 
     # Run evaluation
 
-    experiment_results = aevaluate(
+    experiment_results = await aevaluate(
         target,
         data="weather agent",
         evaluators=[correct, right_tool],
diff --git a/docs/evaluation/how_to_guides/index.md b/docs/evaluation/how_to_guides/index.md
index 5fe3ea1e..698f00fd 100644
--- a/docs/evaluation/how_to_guides/index.md
+++ b/docs/evaluation/how_to_guides/index.md
@@ -19,7 +19,7 @@ Evaluate and improve your application before deploying it.
 - [Evaluate a `langgraph` graph](./how_to_guides/evaluation/langgraph)
 - [Run an evaluation of an existing experiment](./how_to_guides/evaluation/evaluate_existing_experiment)
 - [Run an evaluation via the REST API](./how_to_guides/evaluation/run_evals_api_only)
-- [Run an evaluation from the prompt playground](./how_to_guides/evaluation/run_evaluation_from_prompt_playground)
+- [Run an evaluation from the UI](./how_to_guides/evaluation/run_evaluation_from_prompt_playground)
 
 ### Define an evaluator