diff --git a/langchain/src/smith/runner_utils.ts b/langchain/src/smith/runner_utils.ts index 26c2455c74a2..c6d013438250 100644 --- a/langchain/src/smith/runner_utils.ts +++ b/langchain/src/smith/runner_utils.ts @@ -541,7 +541,7 @@ const applyEvaluators = async ({ for (let i = 0; i < runs.length; i += 1) { const run = runs[i]; const example = examples[i]; - const evaluatorResults = await Promise.all( + const evaluatorResults = await Promise.allSettled( evaluators.map((evaluator) => client.evaluateRun(run, evaluator, { referenceExample: example, @@ -555,7 +555,9 @@ const applyEvaluators = async ({ run?.end_time && run.start_time ? run.end_time - run.start_time : undefined, - feedback: evaluatorResults, + feedback: evaluatorResults.map((evalResult) => + evalResult.status === "fulfilled" ? evalResult.value : evalResult.reason + ), run_id: run.id, }; } diff --git a/langchain/src/smith/tests/run_on_dataset.int.test.ts b/langchain/src/smith/tests/run_on_dataset.int.test.ts index 29ad9f8b80c7..54f8079fbfea 100644 --- a/langchain/src/smith/tests/run_on_dataset.int.test.ts +++ b/langchain/src/smith/tests/run_on_dataset.int.test.ts @@ -251,3 +251,69 @@ test(`Chat model dataset`, async () => { }) ); }); + +test("Thrown errors should not interrupt dataset run", async () => { + async function ragPipeline(_: string): Promise { + throw new Error("I don't know, I am learning from aliens."); + } + + const examples = [ + [ + "When was the Apple Vision Pro released in the US?", + "The Apple Vision Pro was released in the United States on February 2, 2024.", + ], + [ + "What is LangChain?", + "LangChain is an open-source framework for building applications using large language models.", + ], + [ + "Who is the chairman of OpenAI?", + "Bret Taylor is the chairman of the OpenAI", + ], + ]; + + const lsClient = new Client(); + const datasetName = "JS run on dataset integration test"; + let dataset: Dataset; + try { + dataset = await lsClient.readDataset({ datasetName }); + } catch (e) { + dataset = await lsClient.createDataset(datasetName); + await Promise.all( + examples.map(async ([question, answer]) => { + await lsClient.createExample( + { question }, + { answer }, + { datasetId: dataset.id } + ); + }) + ); + } + + // An illustrative custom evaluator example + const dummy = async (_: DynamicRunEvaluatorParams) => { + console.log("RUNNING EVAL"); + throw new Error("Expected error"); + }; + + const evaluation: RunEvalConfig = { + // Custom evaluators can be user-defined RunEvaluator's + // or a compatible function + customEvaluators: [dummy], + }; + + const wrappedRagPipeline = async ({ + question, + }: { + question: string; + }): Promise => { + return ragPipeline(question); + }; + + console.log( + await runOnDataset(wrappedRagPipeline, datasetName, { + evaluationConfig: evaluation, + maxConcurrency: 1, + }) + ); +});