Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan committed Nov 23, 2024
1 parent f6507d0 commit d8b3fc4
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 20 deletions.
4 changes: 2 additions & 2 deletions docs/evaluation/how_to_guides/evaluation/custom_evaluator.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ These functions can be passed directly into [evaluate()](https://langsmith-sdk.r
import type { EvaluationResult } from "langsmith/evaluation";
import type { Run, Example } from "langsmith/schemas";
function correct(rootRun: Run, example: Example): EvaluationResult {
const score = rootRun.outputs?.output === example.outputs?.output;
function correct(run: Run, example: Example): EvaluationResult {
const score = run.outputs?.output === example.outputs?.output;
return { key: "correct", score };
}
`,
Expand Down
25 changes: 9 additions & 16 deletions docs/evaluation/how_to_guides/evaluation/metric_type.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,16 @@ import {

# How to return categorical vs numerical metrics

:::info Key concepts

- Metrics

:::

LangSmith supports both categorical and numerical metrics, and you can return either when writing a [custom evaluator](../../how_to_guides/evaluation/custom_evaluator).

For an evaluator result to be logged as a numerical metric, it must returned as:

- an `int`, `float`, or `bool`
- (Python only) an `int`, `float`, or `bool`
- a dict of the form `{"key": "metric_name", "score": int | float | bool}`

For an evaluator result to be logged as a categorical metric, it must be returned as:

- a `str`
- (Python only) a `str`
- a dict of the form `{"key": "metric_name", "value": str | int | float | bool}`

Here are some examples:
Expand Down Expand Up @@ -58,15 +52,14 @@ Here are some examples:
})`
import type { Run, Example } from "langsmith/schemas";
function multipleScores(rootRun: Run, example: Example) {
function numericalMetric(run: Run, example: Example) {
// Your evaluation logic here
return { key: "numerical_metric", score: 0.8};
}
function categoricalMetric(run: Run, example: Example) {
// Your evaluation logic here
return {
results: [
{ key: "precision", score: 0.8 },
{ key: "recall", score: 0.9 },
{ key: "f1", score: 0.85 },
],
};
return { key: "categorical_metric", value: "english"};
}
`,

Expand Down
4 changes: 2 additions & 2 deletions vercel.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@
"destination": "/prompt_engineering/tutorials/optimize_classifier"
},
{
"source": "evaluation/how_to_guides/evaluation/evaluate_llm_application#evaluate-on-a-particular-version-of-a-dataset",
"destination": "evaluation/how_to_guides/evaluation/dataset_version"
"source": "/evaluation/how_to_guides/evaluation/evaluate_llm_application#evaluate-on-a-particular-version-of-a-dataset",
"destination": "/evaluation/how_to_guides/evaluation/dataset_version"
}
],
"builds": [
Expand Down

0 comments on commit d8b3fc4

Please sign in to comment.