From d8b3fc42be9cc587af1d873c2d070d09951961f6 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Sat, 23 Nov 2024 13:58:07 -0500 Subject: [PATCH] fix --- .../evaluation/custom_evaluator.mdx | 4 +-- .../how_to_guides/evaluation/metric_type.mdx | 25 +++++++------------ vercel.json | 4 +-- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/docs/evaluation/how_to_guides/evaluation/custom_evaluator.mdx b/docs/evaluation/how_to_guides/evaluation/custom_evaluator.mdx index 475cbf8f..658c770e 100644 --- a/docs/evaluation/how_to_guides/evaluation/custom_evaluator.mdx +++ b/docs/evaluation/how_to_guides/evaluation/custom_evaluator.mdx @@ -40,8 +40,8 @@ These functions can be passed directly into [evaluate()](https://langsmith-sdk.r import type { EvaluationResult } from "langsmith/evaluation"; import type { Run, Example } from "langsmith/schemas"; - function correct(rootRun: Run, example: Example): EvaluationResult { - const score = rootRun.outputs?.output === example.outputs?.output; + function correct(run: Run, example: Example): EvaluationResult { + const score = run.outputs?.output === example.outputs?.output; return { key: "correct", score }; } `, diff --git a/docs/evaluation/how_to_guides/evaluation/metric_type.mdx b/docs/evaluation/how_to_guides/evaluation/metric_type.mdx index 5c61b3a7..371cdd92 100644 --- a/docs/evaluation/how_to_guides/evaluation/metric_type.mdx +++ b/docs/evaluation/how_to_guides/evaluation/metric_type.mdx @@ -6,22 +6,16 @@ import { # How to return categorical vs numerical metrics -:::info Key concepts - -- Metrics - -::: - LangSmith supports both categorical and numerical metrics, and you can return either when writing a [custom evaluator](../../how_to_guides/evaluation/custom_evaluator). For an evaluator result to be logged as a numerical metric, it must returned as: -- an `int`, `float`, or `bool` +- (Python only) an `int`, `float`, or `bool` - a dict of the form `{"key": "metric_name", "score": int | float | bool}` For an evaluator result to be logged as a categorical metric, it must be returned as: -- a `str` +- (Python only) a `str` - a dict of the form `{"key": "metric_name", "value": str | int | float | bool}` Here are some examples: @@ -58,15 +52,14 @@ Here are some examples: })` import type { Run, Example } from "langsmith/schemas"; - function multipleScores(rootRun: Run, example: Example) { + function numericalMetric(run: Run, example: Example) { + // Your evaluation logic here + return { key: "numerical_metric", score: 0.8}; + } + + function categoricalMetric(run: Run, example: Example) { // Your evaluation logic here - return { - results: [ - { key: "precision", score: 0.8 }, - { key: "recall", score: 0.9 }, - { key: "f1", score: 0.85 }, - ], - }; + return { key: "categorical_metric", value: "english"}; } `, diff --git a/vercel.json b/vercel.json index ddf7eee3..8ea82cb1 100644 --- a/vercel.json +++ b/vercel.json @@ -187,8 +187,8 @@ "destination": "/prompt_engineering/tutorials/optimize_classifier" }, { - "source": "evaluation/how_to_guides/evaluation/evaluate_llm_application#evaluate-on-a-particular-version-of-a-dataset", - "destination": "evaluation/how_to_guides/evaluation/dataset_version" + "source": "/evaluation/how_to_guides/evaluation/evaluate_llm_application#evaluate-on-a-particular-version-of-a-dataset", + "destination": "/evaluation/how_to_guides/evaluation/dataset_version" } ], "builds": [