Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan committed Nov 23, 2024
1 parent 8ebd71e commit 6c23581
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ Additionally, you can also chain multiple filters together using the `and` opera
tabs={[
PythonBlock(
`examples = client.list_examples(
dataset_name=dataset_name,
filter='and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'
)`
dataset_name=dataset_name,
filter='and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'
)`
),
TypeScriptBlock(
`const examples = await client.listExamples({datasetName: datasetName, filter: 'and(not(has(metadata, \\'{"foo": "bar"}\\')), exists(metadata, "tenant_id"))'});`
Expand Down
26 changes: 11 additions & 15 deletions docs/evaluation/how_to_guides/evaluation/async.mdx
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import {
CodeTabs,
python,
typescript,
} from "@site/src/components/InstructionsWithCode";
import { CodeTabs, python } from "@site/src/components/InstructionsWithCode";

# How to run an evaluation asynchronously

Expand All @@ -29,7 +25,7 @@ You can see how to use it [here](../../how_to_guides/evaluation/evaluate_llm_app
<CodeTabs
groupId="client-language"
tabs={[
python`
python({caption: "Requires `langsmith>=0.1.145`"})`
from langsmith import aevaluate, wrappers, Client
from openai import AsyncOpenAI
Expand Down Expand Up @@ -57,22 +53,22 @@ list 5 concrete questions that should be investigated to determine if the idea i
return len(output["output"]) < 3 * len(inputs["idea"])
ls_client = Client()
# TODO
dataset = ...
results = aevaluate(
examples = ["universal basic income", "nuclear fusion", "hyperloop", "nuclear powered rockets"]
dataset = ls_client.create_dataset("research ideas")
ls_client.create_examples(
dataset_name=dataset.name,
inputs=[{"idea": e} for e in examples,
)
results = await aevaluate(
researcher_app,
data=dataset,
evaluators=[concise],
max_concurrency=2, # Optional, no max by default
experiment_prefix="gpt-4o-mini, baseline" # Optional, random by default
experiment_prefix="gpt-4o-mini-baseline" # Optional, random by default
)
`,
typescript`
import type { EvaluationResult } from "langsmith/evaluation";
import type { Run, Example } from "langsmith/schemas";
`,

]}
/>
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {

:::info Key concepts

[Evaluations](../../concepts#applying-evaluations) | [Evaluators](../../concepts#evaluators) | [Datasets](../../concepts#datasets) | [Experiments](../../concepts#experiments)
[Evaluations](../../concepts#applying-evaluations) | [Evaluators](../../concepts#evaluators) | [Datasets](../../concepts#datasets)

:::

Expand Down Expand Up @@ -232,7 +232,7 @@ _If you've annotated your code for tracing, you can open the trace of each row i
<CodeTabs
groupId="client-language"
tabs={[
python`
python({caption: "Requires `langsmith>=0.1.145`"})`
from langsmith import Client, evaluate, traceable, wrappers
from openai import OpenAI
Expand Down
10 changes: 5 additions & 5 deletions docs/evaluation/how_to_guides/evaluation/langgraph.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ If any of you nodes are defined as async, you'll need to use `aevaluate`
# Remember that langgraph graphs are also langchain runnables.
target = example_to_state | app
experiment_results = aevaluate(
experiment_results = await aevaluate(
target,
data="weather agent",
evaluators=[correct],
Expand Down Expand Up @@ -236,7 +236,7 @@ For example, we can look at the messages to assert that the model invoked the 's
tool_calls = outputs["messages"][1].tool_calls
return bool(tool_calls and tool_calls[0]["name"] == "search")
experiment_results = aevaluate(
experiment_results = await aevaluate(
target,
data="weather agent",
evaluators=[correct, right_tool],
Expand Down Expand Up @@ -272,7 +272,7 @@ See more about what arguments you can pass to custom evaluators in this [how-to
right_tool = bool(tool_calls and tool_calls[0]["name"] == "search")
return {"key": "right_tool", "value": right_tool}
experiment_results = aevaluate(
experiment_results = await aevaluate(
target,
data="weather agent",
evaluators=[correct, right_tool_from_run],
Expand All @@ -299,7 +299,7 @@ In this case we can even continue using the evaluators we've been using.
node_target = example_to_state | app.nodes["agent"]
node_experiment_results = aevaluate(
node_experiment_results = await aevaluate(
node_target,
data="weather agent",
evaluators=[right_tool_from_run],
Expand Down Expand Up @@ -450,7 +450,7 @@ In this case we can even continue using the evaluators we've been using.
# Run evaluation
experiment_results = aevaluate(
experiment_results = await aevaluate(
target,
data="weather agent",
evaluators=[correct, right_tool],
Expand Down
2 changes: 1 addition & 1 deletion docs/evaluation/how_to_guides/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Evaluate and improve your application before deploying it.
- [Evaluate a `langgraph` graph](./how_to_guides/evaluation/langgraph)
- [Run an evaluation of an existing experiment](./how_to_guides/evaluation/evaluate_existing_experiment)
- [Run an evaluation via the REST API](./how_to_guides/evaluation/run_evals_api_only)
- [Run an evaluation from the prompt playground](./how_to_guides/evaluation/run_evaluation_from_prompt_playground)
- [Run an evaluation from the UI](./how_to_guides/evaluation/run_evaluation_from_prompt_playground)

### Define an evaluator

Expand Down

0 comments on commit 6c23581

Please sign in to comment.