Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan committed Dec 4, 2024
1 parent 6720e9d commit 02c1069
Showing 1 changed file with 28 additions and 9 deletions.
37 changes: 28 additions & 9 deletions docs/evaluation/how_to_guides/summary.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,20 @@ Below, we'll implement a very simple summary evaluator that computes overall pas
python`
def pass_50(outputs: list[dict], reference_outputs: list[dict]) -> bool:
"""Pass if >50% of all results are correct."""
correct = sum([out["output"] == ref["label"] for out, ref in zip(outputs, reference_outputs)])
correct = sum([out["class"] == ref["label"] for out, ref in zip(outputs, reference_outputs)])
return correct / len(outputs) > 0.5
`,
typescript`
import { Run, Example } from "langsmith/schemas";
function summaryEval(runs: Run[], examples: Example[]) {
function summaryEval({ outputs, referenceOutputs }: { outputs: Record<string, any>[], referenceOutputs?: Record<string, any>[]}) {
let correct = 0;
for (let i = 0; i < runs.length; i++) {
if (runs[i].outputs["output"] === examples[i].outputs["label"]) {
for (let i = 0; i < outputs.length; i++) {
if (outputs[i]["output"] === referenceOutputs[i]["label"]) {
correct += 1;
}
}
return { key: "pass", score: correct / runs.length > 0.5 };
return { key: "pass", score: correct / outputs.length > 0.5 };
}
`,
]}
Expand Down Expand Up @@ -67,9 +65,30 @@ You can then pass this evaluator to the `evaluate` method as follows:
)
`,
typescript`
await evaluate((inputs) => labelQuery(inputs["input"]), {
import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import type { EvaluationResult } from "langsmith/evaluation";
const client = new Client();
const datasetName = "Toxic queries";
const dataset = await client.clonePublicDataset(
"https://smith.langchain.com/public/3d6831e6-1680-4c88-94df-618c8e01fc55/d,
{ datasetName: datasetName }
);
function correct({ outputs, referenceOutputs }: { outputs: Record<string, any>, referenceOutputs?: Record<string, any> }): EvaluationResult {
const score = outputs["class"] === referenceOutputs?["label"];
return { key: "correct", score };
}
function badClassifier(inputs: Record<string, any>): { class: string } {
return { class: "Not toxic" };
}
await evaluate(badClassifier, {
data: datasetName,
evaluators: [correctLabel],
evaluators: [correct],
summaryEvaluators: [summaryEval],
experimentPrefix: "Toxic Queries",
});
Expand Down

0 comments on commit 02c1069

Please sign in to comment.