Skip to content

Commit

Permalink
Make thrown evaluator errors not interrupt dataset flow
Browse files Browse the repository at this point in the history
  • Loading branch information
jacoblee93 committed Apr 8, 2024
1 parent d6e25af commit b49f0ad
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
6 changes: 4 additions & 2 deletions langchain/src/smith/runner_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ const applyEvaluators = async ({
for (let i = 0; i < runs.length; i += 1) {
const run = runs[i];
const example = examples[i];
const evaluatorResults = await Promise.all(
const evaluatorResults = await Promise.allSettled(
evaluators.map((evaluator) =>
client.evaluateRun(run, evaluator, {
referenceExample: example,
Expand All @@ -555,7 +555,9 @@ const applyEvaluators = async ({
run?.end_time && run.start_time
? run.end_time - run.start_time
: undefined,
feedback: evaluatorResults,
feedback: evaluatorResults.map((evalResult) =>
evalResult.status === "fulfilled" ? evalResult.value : evalResult.reason
),
run_id: run.id,
};
}
Expand Down
66 changes: 66 additions & 0 deletions langchain/src/smith/tests/run_on_dataset.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,69 @@ test(`Chat model dataset`, async () => {
})
);
});

test("Thrown errors should not interrupt dataset run", async () => {
async function ragPipeline(_: string): Promise<string> {
throw new Error("I don't know, I am learning from aliens.");
}

const examples = [
[
"When was the Apple Vision Pro released in the US?",
"The Apple Vision Pro was released in the United States on February 2, 2024.",
],
[
"What is LangChain?",
"LangChain is an open-source framework for building applications using large language models.",
],
[
"Who is the chairman of OpenAI?",
"Bret Taylor is the chairman of the OpenAI",
],
];

const lsClient = new Client();
const datasetName = "JS run on dataset integration test";
let dataset: Dataset;
try {
dataset = await lsClient.readDataset({ datasetName });
} catch (e) {
dataset = await lsClient.createDataset(datasetName);
await Promise.all(
examples.map(async ([question, answer]) => {
await lsClient.createExample(
{ question },
{ answer },
{ datasetId: dataset.id }
);
})
);
}

// An illustrative custom evaluator example
const dummy = async (_: DynamicRunEvaluatorParams) => {
console.log("RUNNING EVAL");
throw new Error("Expected error");
};

const evaluation: RunEvalConfig = {
// Custom evaluators can be user-defined RunEvaluator's
// or a compatible function
customEvaluators: [dummy],
};

const wrappedRagPipeline = async ({
question,
}: {
question: string;
}): Promise<string> => {
return ragPipeline(question);
};

console.log(
await runOnDataset(wrappedRagPipeline, datasetName, {
evaluationConfig: evaluation,
maxConcurrency: 1,
})
);
});

0 comments on commit b49f0ad

Please sign in to comment.