From e420b1841fc04a0a64965088a006659befcecaf0 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Thu, 14 Nov 2024 11:58:39 -0800 Subject: [PATCH 01/23] draft --- js/src/client.ts | 71 +++++++++++++++++++++ js/src/schemas.ts | 9 +++ js/src/tests/client.test.ts | 122 ++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) diff --git a/js/src/client.ts b/js/src/client.ts index 8c094e7fd..6149d188e 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -35,6 +35,8 @@ import { AnnotationQueue, RunWithAnnotationQueueInfo, Attachments, + ExampleUpsertWithAttachments, + UpsertExamplesResponse } from "./schemas.js"; import { convertLangChainMessageToExample, @@ -3832,6 +3834,75 @@ export class Client { ); } + /** + * Upsert examples with attachments using multipart form data. + * @param upserts List of ExampleUpsertWithAttachments objects to upsert + * @returns Promise with the upsert response + */ + public async upsertExamplesMultipart({ + upserts = [], + }: { + upserts?: ExampleUpsertWithAttachments[]; + }): Promise { + const formData = new FormData(); + + for (const example of upserts) { + const exampleId = (example.id ?? uuid.v4()).toString(); + + // Prepare the main example body + const exampleBody = { + dataset_id: example.dataset_id, + created_at: example.created_at, + ...(example.metadata && { metadata: example.metadata }), + ...(example.split && { split: example.split }), + }; + + // Add main example data + const stringifiedExample = stringifyForTracing(exampleBody); + const exampleBlob = new Blob([stringifiedExample], { + type: "application/json" + }); + formData.append(exampleId, exampleBlob); + + // Add inputs + const stringifiedInputs = stringifyForTracing(example.inputs); + const inputsBlob = new Blob([stringifiedInputs], { + type: "application/json" + }); + formData.append(`${exampleId}.inputs`, inputsBlob); + + // Add outputs if present + if (example.outputs) { + const stringifiedOutputs = stringifyForTracing(example.outputs); + const outputsBlob = new Blob([stringifiedOutputs], { + type: "application/json" + }); + formData.append(`${exampleId}.outputs`, outputsBlob); + } + + // Add attachments if present + if (example.attachments) { + for (const [name, [mimeType, data]] of Object.entries(example.attachments)) { + const attachmentBlob = new Blob([data], { type: `${mimeType}; length=${data.byteLength}` }); + formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + } + } + } + + const response = await this.caller.call( + _getFetchImplementation(), + `${this.apiUrl}/v1/platform/examples/multipart`, + { + method: "POST", + headers: this.headers, + body: formData, + } + ); + const result = await response.json(); + return result; + + } + public async updatePrompt( promptIdentifier: string, options?: { diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 26afd7fc0..90dfc5186 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -248,6 +248,15 @@ export interface ExampleCreate extends BaseExample { split?: string | string[]; } +export interface ExampleUpsertWithAttachments extends ExampleCreate { + attachments?: Attachments; +} + +export interface UpsertExamplesResponse { + count: number; + example_ids: string[]; +} + export interface Example extends BaseExample { id: string; created_at: string; diff --git a/js/src/tests/client.test.ts b/js/src/tests/client.test.ts index d86c0dc24..02871ba1a 100644 --- a/js/src/tests/client.test.ts +++ b/js/src/tests/client.test.ts @@ -1,5 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest } from "@jest/globals"; +import { v4 as uuidv4 } from "uuid"; import { Client } from "../client.js"; import { getEnvironmentVariables, @@ -10,6 +11,10 @@ import { isVersionGreaterOrEqual, parsePromptIdentifier, } from "../utils/prompts.js"; +import { ExampleUpsertWithAttachments } from "../schemas.js"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; describe("Client", () => { describe("createLLMExample", () => { @@ -249,4 +254,121 @@ describe("Client", () => { }); }); }); + + describe("upsertExamplesMultipart", () => { + it("should upsert examples with attachments via multipart endpoint", async () => { + const datasetName = `__test_upsert_examples_multipart${uuidv4().slice(0, 4)}`; + // NEED TO FIX THIS AFTER ENDPOINT MAKES IT TO PROD + const client = new Client({ + apiUrl: "https://dev.api.smith.langchain.com", + apiKey: "HARDCODE FOR TESTING" + }); + + // Clean up existing dataset if it exists + if (await client.hasDataset({ datasetName })) { + await client.deleteDataset({ datasetName }); + } + + // Create actual dataset + const dataset = await client.createDataset( + datasetName, { + description: "Test dataset for multipart example upload", + dataType: "kv" + } + ); + + const pathname = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "test_data", + "parrot-icon.png" + ); + // Create test examples + const exampleId = uuidv4(); + const example1: ExampleUpsertWithAttachments = { + id: exampleId, + dataset_id: dataset.id, + inputs: { text: "hello world" }, + // check that passing no outputs works fine + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const example2: ExampleUpsertWithAttachments = { + dataset_id: dataset.id, + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + // Test creating examples + const createdExamples = await client.upsertExamplesMultipart({ + upserts: [ + example1, + example2, + ]}); + + expect(createdExamples.count).toBe(2); + + const createdExample1 = await client.readExample( + createdExamples.example_ids[0] + ); + expect(createdExample1.inputs["text"]).toBe("hello world"); + + const createdExample2 = await client.readExample( + createdExamples.example_ids[1] + ); + expect(createdExample2.inputs["text"]).toBe("foo bar"); + expect(createdExample2.outputs?.["response"]).toBe("baz"); + + // Test examples were sent to correct dataset + const allExamplesInDataset = []; + for await (const example of client.listExamples({ + datasetId: dataset.id, + })) { + allExamplesInDataset.push(example); + } + expect(allExamplesInDataset.length).toBe(2); + + // Test updating example + const example1Update: ExampleUpsertWithAttachments = { + id: exampleId, + dataset_id: dataset.id, + inputs: { text: "bar baz" }, + outputs: { response: "foo" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const updatedExamples = await client.upsertExamplesMultipart({ + upserts: [ + example1Update, + ]}); + expect(updatedExamples.count).toBe(1); + expect(updatedExamples.example_ids[0]).toBe(exampleId); + + const updatedExample = await client.readExample(updatedExamples.example_ids[0]); + expect(updatedExample.inputs["text"]).toBe("bar baz"); + expect(updatedExample.outputs?.["response"]).toBe("foo"); + + // Test invalid example fails + const example3: ExampleUpsertWithAttachments = { + dataset_id: uuidv4(), // not a real dataset + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const errorResponse = await client.upsertExamplesMultipart({ upserts: [example3] }); + expect(errorResponse).toHaveProperty("error"); + + // Clean up + await client.deleteDataset({ datasetName }); + }); + }); }); From f778d26fc84b62fcac1769ab158495cc7d856c6a Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 14:46:53 -0800 Subject: [PATCH 02/23] ankush comments --- js/src/client.ts | 105 +++++++++++++-------------- js/src/tests/client.int.test.ts | 113 +++++++++++++++++++++++++++++ js/src/tests/client.test.ts | 122 -------------------------------- 3 files changed, 166 insertions(+), 174 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 6149d188e..3d441ee03 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -36,7 +36,7 @@ import { RunWithAnnotationQueueInfo, Attachments, ExampleUpsertWithAttachments, - UpsertExamplesResponse + UpsertExamplesResponse, } from "./schemas.js"; import { convertLangChainMessageToExample, @@ -3839,68 +3839,69 @@ export class Client { * @param upserts List of ExampleUpsertWithAttachments objects to upsert * @returns Promise with the upsert response */ - public async upsertExamplesMultipart({ - upserts = [], - }: { - upserts?: ExampleUpsertWithAttachments[]; - }): Promise { + public async upsertExamplesMultipart( + upserts: ExampleUpsertWithAttachments[] = [] + ): Promise { const formData = new FormData(); - for (const example of upserts) { - const exampleId = (example.id ?? uuid.v4()).toString(); - - // Prepare the main example body - const exampleBody = { - dataset_id: example.dataset_id, - created_at: example.created_at, - ...(example.metadata && { metadata: example.metadata }), - ...(example.split && { split: example.split }), - }; + for (const example of upserts) { + const exampleId = (example.id ?? uuid.v4()).toString(); - // Add main example data - const stringifiedExample = stringifyForTracing(exampleBody); - const exampleBlob = new Blob([stringifiedExample], { - type: "application/json" - }); - formData.append(exampleId, exampleBlob); + // Prepare the main example body + const exampleBody = { + dataset_id: example.dataset_id, + created_at: example.created_at, + ...(example.metadata && { metadata: example.metadata }), + ...(example.split && { split: example.split }), + }; - // Add inputs - const stringifiedInputs = stringifyForTracing(example.inputs); - const inputsBlob = new Blob([stringifiedInputs], { - type: "application/json" - }); - formData.append(`${exampleId}.inputs`, inputsBlob); + // Add main example data + const stringifiedExample = stringifyForTracing(exampleBody); + const exampleBlob = new Blob([stringifiedExample], { + type: "application/json", + }); + formData.append(exampleId, exampleBlob); - // Add outputs if present - if (example.outputs) { - const stringifiedOutputs = stringifyForTracing(example.outputs); - const outputsBlob = new Blob([stringifiedOutputs], { - type: "application/json" + // Add inputs + const stringifiedInputs = stringifyForTracing(example.inputs); + const inputsBlob = new Blob([stringifiedInputs], { + type: "application/json", }); - formData.append(`${exampleId}.outputs`, outputsBlob); - } + formData.append(`${exampleId}.inputs`, inputsBlob); - // Add attachments if present - if (example.attachments) { - for (const [name, [mimeType, data]] of Object.entries(example.attachments)) { - const attachmentBlob = new Blob([data], { type: `${mimeType}; length=${data.byteLength}` }); - formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + // Add outputs if present + if (example.outputs) { + const stringifiedOutputs = stringifyForTracing(example.outputs); + const outputsBlob = new Blob([stringifiedOutputs], { + type: "application/json", + }); + formData.append(`${exampleId}.outputs`, outputsBlob); } - } - } - const response = await this.caller.call( - _getFetchImplementation(), - `${this.apiUrl}/v1/platform/examples/multipart`, - { - method: "POST", - headers: this.headers, - body: formData, + // Add attachments if present + if (example.attachments) { + for (const [name, [mimeType, data]] of Object.entries( + example.attachments + )) { + const attachmentBlob = new Blob([data], { + type: `${mimeType}; length=${data.byteLength}`, + }); + formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + } + } } - ); - const result = await response.json(); - return result; + const response = await this.caller.call( + _getFetchImplementation(), + `${this.apiUrl}/v1/platform/examples/multipart`, + { + method: "POST", + headers: this.headers, + body: formData, + } + ); + const result = await response.json(); + return result; } public async updatePrompt( diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 7b5d63f89..d13407781 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -7,6 +7,10 @@ import { import { Client } from "../client.js"; import { v4 as uuidv4 } from "uuid"; +import { ExampleUpsertWithAttachments } from "../schemas.js"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; import { createRunsFactory, deleteDataset, @@ -1240,3 +1244,112 @@ test("annotationqueue crud", async () => { } } }); + +test("annotationqueue crud", async () => { + const client = new Client(); + const datasetName = `__test_upsert_examples_multipart${uuidv4().slice(0, 4)}`; + + // Clean up existing dataset if it exists + if (await client.hasDataset({ datasetName })) { + await client.deleteDataset({ datasetName }); + } + + // Create actual dataset + const dataset = await client.createDataset(datasetName, { + description: "Test dataset for multipart example upload", + dataType: "kv", + }); + + const pathname = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "test_data", + "parrot-icon.png" + ); + // Create test examples + const exampleId = uuidv4(); + const example1: ExampleUpsertWithAttachments = { + id: exampleId, + dataset_id: dataset.id, + inputs: { text: "hello world" }, + // check that passing no outputs works fine + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const example2: ExampleUpsertWithAttachments = { + dataset_id: dataset.id, + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + // Test creating examples + const createdExamples = await client.upsertExamplesMultipart([ + example1, + example2, + ]); + + expect(createdExamples.count).toBe(2); + + const createdExample1 = await client.readExample( + createdExamples.example_ids[0] + ); + expect(createdExample1.inputs["text"]).toBe("hello world"); + + const createdExample2 = await client.readExample( + createdExamples.example_ids[1] + ); + expect(createdExample2.inputs["text"]).toBe("foo bar"); + expect(createdExample2.outputs?.["response"]).toBe("baz"); + + // Test examples were sent to correct dataset + const allExamplesInDataset = []; + for await (const example of client.listExamples({ + datasetId: dataset.id, + })) { + allExamplesInDataset.push(example); + } + expect(allExamplesInDataset.length).toBe(2); + + // Test updating example + const example1Update: ExampleUpsertWithAttachments = { + id: exampleId, + dataset_id: dataset.id, + inputs: { text: "bar baz" }, + outputs: { response: "foo" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const updatedExamples = await client.upsertExamplesMultipart([ + example1Update, + ]); + expect(updatedExamples.count).toBe(1); + expect(updatedExamples.example_ids[0]).toBe(exampleId); + + const updatedExample = await client.readExample( + updatedExamples.example_ids[0] + ); + expect(updatedExample.inputs["text"]).toBe("bar baz"); + expect(updatedExample.outputs?.["response"]).toBe("foo"); + + // Test invalid example fails + const example3: ExampleUpsertWithAttachments = { + dataset_id: uuidv4(), // not a real dataset + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const errorResponse = await client.upsertExamplesMultipart([example3]); + expect(errorResponse).toHaveProperty("error"); + + // Clean up + await client.deleteDataset({ datasetName }); +}); diff --git a/js/src/tests/client.test.ts b/js/src/tests/client.test.ts index 02871ba1a..d86c0dc24 100644 --- a/js/src/tests/client.test.ts +++ b/js/src/tests/client.test.ts @@ -1,6 +1,5 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest } from "@jest/globals"; -import { v4 as uuidv4 } from "uuid"; import { Client } from "../client.js"; import { getEnvironmentVariables, @@ -11,10 +10,6 @@ import { isVersionGreaterOrEqual, parsePromptIdentifier, } from "../utils/prompts.js"; -import { ExampleUpsertWithAttachments } from "../schemas.js"; -import * as fs from "node:fs"; -import * as path from "node:path"; -import { fileURLToPath } from "node:url"; describe("Client", () => { describe("createLLMExample", () => { @@ -254,121 +249,4 @@ describe("Client", () => { }); }); }); - - describe("upsertExamplesMultipart", () => { - it("should upsert examples with attachments via multipart endpoint", async () => { - const datasetName = `__test_upsert_examples_multipart${uuidv4().slice(0, 4)}`; - // NEED TO FIX THIS AFTER ENDPOINT MAKES IT TO PROD - const client = new Client({ - apiUrl: "https://dev.api.smith.langchain.com", - apiKey: "HARDCODE FOR TESTING" - }); - - // Clean up existing dataset if it exists - if (await client.hasDataset({ datasetName })) { - await client.deleteDataset({ datasetName }); - } - - // Create actual dataset - const dataset = await client.createDataset( - datasetName, { - description: "Test dataset for multipart example upload", - dataType: "kv" - } - ); - - const pathname = path.join( - path.dirname(fileURLToPath(import.meta.url)), - "test_data", - "parrot-icon.png" - ); - // Create test examples - const exampleId = uuidv4(); - const example1: ExampleUpsertWithAttachments = { - id: exampleId, - dataset_id: dataset.id, - inputs: { text: "hello world" }, - // check that passing no outputs works fine - attachments: { - test_file: ["image/png", fs.readFileSync(pathname)], - }, - }; - - const example2: ExampleUpsertWithAttachments = { - dataset_id: dataset.id, - inputs: { text: "foo bar" }, - outputs: { response: "baz" }, - attachments: { - my_file: ["image/png", fs.readFileSync(pathname)], - }, - }; - - // Test creating examples - const createdExamples = await client.upsertExamplesMultipart({ - upserts: [ - example1, - example2, - ]}); - - expect(createdExamples.count).toBe(2); - - const createdExample1 = await client.readExample( - createdExamples.example_ids[0] - ); - expect(createdExample1.inputs["text"]).toBe("hello world"); - - const createdExample2 = await client.readExample( - createdExamples.example_ids[1] - ); - expect(createdExample2.inputs["text"]).toBe("foo bar"); - expect(createdExample2.outputs?.["response"]).toBe("baz"); - - // Test examples were sent to correct dataset - const allExamplesInDataset = []; - for await (const example of client.listExamples({ - datasetId: dataset.id, - })) { - allExamplesInDataset.push(example); - } - expect(allExamplesInDataset.length).toBe(2); - - // Test updating example - const example1Update: ExampleUpsertWithAttachments = { - id: exampleId, - dataset_id: dataset.id, - inputs: { text: "bar baz" }, - outputs: { response: "foo" }, - attachments: { - my_file: ["image/png", fs.readFileSync(pathname)], - }, - }; - - const updatedExamples = await client.upsertExamplesMultipart({ - upserts: [ - example1Update, - ]}); - expect(updatedExamples.count).toBe(1); - expect(updatedExamples.example_ids[0]).toBe(exampleId); - - const updatedExample = await client.readExample(updatedExamples.example_ids[0]); - expect(updatedExample.inputs["text"]).toBe("bar baz"); - expect(updatedExample.outputs?.["response"]).toBe("foo"); - - // Test invalid example fails - const example3: ExampleUpsertWithAttachments = { - dataset_id: uuidv4(), // not a real dataset - inputs: { text: "foo bar" }, - outputs: { response: "baz" }, - attachments: { - my_file: ["image/png", fs.readFileSync(pathname)], - }, - }; - - const errorResponse = await client.upsertExamplesMultipart({ upserts: [example3] }); - expect(errorResponse).toHaveProperty("error"); - - // Clean up - await client.deleteDataset({ datasetName }); - }); - }); }); From f8a4ff4c724c66fd3947f18a56eb257a8d813aba Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 2 Dec 2024 10:38:26 -0800 Subject: [PATCH 03/23] update endpoint --- js/src/client.ts | 21 +++++++++++++-------- js/src/schemas.ts | 4 ++-- js/src/tests/client.int.test.ts | 16 ++++++++-------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 3d441ee03..ee999247e 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -35,8 +35,8 @@ import { AnnotationQueue, RunWithAnnotationQueueInfo, Attachments, - ExampleUpsertWithAttachments, - UpsertExamplesResponse, + ExampleUploadWithAttachments, + UploadExamplesResponse, } from "./schemas.js"; import { convertLangChainMessageToExample, @@ -3839,17 +3839,16 @@ export class Client { * @param upserts List of ExampleUpsertWithAttachments objects to upsert * @returns Promise with the upsert response */ - public async upsertExamplesMultipart( - upserts: ExampleUpsertWithAttachments[] = [] - ): Promise { + public async uploadExamplesMultipart( + uploads: ExampleUploadWithAttachments[] = [] + ): Promise { const formData = new FormData(); - for (const example of upserts) { + for (const example of uploads) { const exampleId = (example.id ?? uuid.v4()).toString(); // Prepare the main example body const exampleBody = { - dataset_id: example.dataset_id, created_at: example.created_at, ...(example.metadata && { metadata: example.metadata }), ...(example.split && { split: example.split }), @@ -3891,9 +3890,15 @@ export class Client { } } + const datasetIds = uploads.map((example) => example.dataset_id); + if (datasetIds.length > 1) { + throw new Error("Cannot upload examples to multiple datasets"); + } + const datasetId = datasetIds[0]; + const response = await this.caller.call( _getFetchImplementation(), - `${this.apiUrl}/v1/platform/examples/multipart`, + `${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`, { method: "POST", headers: this.headers, diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 90dfc5186..28524a36f 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -248,11 +248,11 @@ export interface ExampleCreate extends BaseExample { split?: string | string[]; } -export interface ExampleUpsertWithAttachments extends ExampleCreate { +export interface ExampleUploadWithAttachments extends ExampleCreate { attachments?: Attachments; } -export interface UpsertExamplesResponse { +export interface UploadExamplesResponse { count: number; example_ids: string[]; } diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index d13407781..0e806c240 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -7,7 +7,7 @@ import { import { Client } from "../client.js"; import { v4 as uuidv4 } from "uuid"; -import { ExampleUpsertWithAttachments } from "../schemas.js"; +import { ExampleUploadWithAttachments } from "../schemas.js"; import * as fs from "node:fs"; import * as path from "node:path"; import { fileURLToPath } from "node:url"; @@ -1267,7 +1267,7 @@ test("annotationqueue crud", async () => { ); // Create test examples const exampleId = uuidv4(); - const example1: ExampleUpsertWithAttachments = { + const example1: ExampleUploadWithAttachments = { id: exampleId, dataset_id: dataset.id, inputs: { text: "hello world" }, @@ -1277,7 +1277,7 @@ test("annotationqueue crud", async () => { }, }; - const example2: ExampleUpsertWithAttachments = { + const example2: ExampleUploadWithAttachments = { dataset_id: dataset.id, inputs: { text: "foo bar" }, outputs: { response: "baz" }, @@ -1287,7 +1287,7 @@ test("annotationqueue crud", async () => { }; // Test creating examples - const createdExamples = await client.upsertExamplesMultipart([ + const createdExamples = await client.uploadExamplesMultipart([ example1, example2, ]); @@ -1315,7 +1315,7 @@ test("annotationqueue crud", async () => { expect(allExamplesInDataset.length).toBe(2); // Test updating example - const example1Update: ExampleUpsertWithAttachments = { + const example1Update: ExampleUploadWithAttachments = { id: exampleId, dataset_id: dataset.id, inputs: { text: "bar baz" }, @@ -1325,7 +1325,7 @@ test("annotationqueue crud", async () => { }, }; - const updatedExamples = await client.upsertExamplesMultipart([ + const updatedExamples = await client.uploadExamplesMultipart([ example1Update, ]); expect(updatedExamples.count).toBe(1); @@ -1338,7 +1338,7 @@ test("annotationqueue crud", async () => { expect(updatedExample.outputs?.["response"]).toBe("foo"); // Test invalid example fails - const example3: ExampleUpsertWithAttachments = { + const example3: ExampleUploadWithAttachments = { dataset_id: uuidv4(), // not a real dataset inputs: { text: "foo bar" }, outputs: { response: "baz" }, @@ -1347,7 +1347,7 @@ test("annotationqueue crud", async () => { }, }; - const errorResponse = await client.upsertExamplesMultipart([example3]); + const errorResponse = await client.uploadExamplesMultipart([example3]); expect(errorResponse).toHaveProperty("error"); // Clean up From 922240fa49a2bc5ed77dbeaa20c84fed6e661c08 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:43:02 -0800 Subject: [PATCH 04/23] dataset id as param --- js/src/client.ts | 7 +------ js/src/schemas.ts | 8 +++++++- js/src/tests/client.int.test.ts | 10 +++------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 0ef635f10..95cbdcacd 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -3855,6 +3855,7 @@ export class Client implements LangSmithTracingClientInterface { * @returns Promise with the upsert response */ public async uploadExamplesMultipart( + datasetId: string, uploads: ExampleUploadWithAttachments[] = [] ): Promise { const formData = new FormData(); @@ -3905,12 +3906,6 @@ export class Client implements LangSmithTracingClientInterface { } } - const datasetIds = uploads.map((example) => example.dataset_id); - if (datasetIds.length > 1) { - throw new Error("Cannot upload examples to multiple datasets"); - } - const datasetId = datasetIds[0]; - const response = await this.caller.call( _getFetchImplementation(), `${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`, diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 28524a36f..d8f8dbba9 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -248,7 +248,13 @@ export interface ExampleCreate extends BaseExample { split?: string | string[]; } -export interface ExampleUploadWithAttachments extends ExampleCreate { +export interface ExampleUploadWithAttachments { + id?: string; + created_at?: string; + inputs: KVMap; + outputs?: KVMap; + metadata?: KVMap; + split?: string | string[]; attachments?: Attachments; } diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 0e806c240..dd6ddbaa0 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1269,7 +1269,6 @@ test("annotationqueue crud", async () => { const exampleId = uuidv4(); const example1: ExampleUploadWithAttachments = { id: exampleId, - dataset_id: dataset.id, inputs: { text: "hello world" }, // check that passing no outputs works fine attachments: { @@ -1278,7 +1277,6 @@ test("annotationqueue crud", async () => { }; const example2: ExampleUploadWithAttachments = { - dataset_id: dataset.id, inputs: { text: "foo bar" }, outputs: { response: "baz" }, attachments: { @@ -1287,7 +1285,7 @@ test("annotationqueue crud", async () => { }; // Test creating examples - const createdExamples = await client.uploadExamplesMultipart([ + const createdExamples = await client.uploadExamplesMultipart(dataset.id, [ example1, example2, ]); @@ -1317,7 +1315,6 @@ test("annotationqueue crud", async () => { // Test updating example const example1Update: ExampleUploadWithAttachments = { id: exampleId, - dataset_id: dataset.id, inputs: { text: "bar baz" }, outputs: { response: "foo" }, attachments: { @@ -1325,7 +1322,7 @@ test("annotationqueue crud", async () => { }, }; - const updatedExamples = await client.uploadExamplesMultipart([ + const updatedExamples = await client.uploadExamplesMultipart(dataset.id,[ example1Update, ]); expect(updatedExamples.count).toBe(1); @@ -1339,7 +1336,6 @@ test("annotationqueue crud", async () => { // Test invalid example fails const example3: ExampleUploadWithAttachments = { - dataset_id: uuidv4(), // not a real dataset inputs: { text: "foo bar" }, outputs: { response: "baz" }, attachments: { @@ -1347,7 +1343,7 @@ test("annotationqueue crud", async () => { }, }; - const errorResponse = await client.uploadExamplesMultipart([example3]); + const errorResponse = await client.uploadExamplesMultipart(uuidv4(), [example3]); expect(errorResponse).toHaveProperty("error"); // Clean up From cd8d93d5e442bdfd0c1f4981311a79086df20648 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 16:51:26 -0800 Subject: [PATCH 05/23] fmt --- js/src/tests/client.int.test.ts | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index dd6ddbaa0..45f883dc2 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1245,7 +1245,7 @@ test("annotationqueue crud", async () => { } }); -test("annotationqueue crud", async () => { +test("upload examples multipart", async () => { const client = new Client(); const datasetName = `__test_upsert_examples_multipart${uuidv4().slice(0, 4)}`; @@ -1312,28 +1312,6 @@ test("annotationqueue crud", async () => { } expect(allExamplesInDataset.length).toBe(2); - // Test updating example - const example1Update: ExampleUploadWithAttachments = { - id: exampleId, - inputs: { text: "bar baz" }, - outputs: { response: "foo" }, - attachments: { - my_file: ["image/png", fs.readFileSync(pathname)], - }, - }; - - const updatedExamples = await client.uploadExamplesMultipart(dataset.id,[ - example1Update, - ]); - expect(updatedExamples.count).toBe(1); - expect(updatedExamples.example_ids[0]).toBe(exampleId); - - const updatedExample = await client.readExample( - updatedExamples.example_ids[0] - ); - expect(updatedExample.inputs["text"]).toBe("bar baz"); - expect(updatedExample.outputs?.["response"]).toBe("foo"); - // Test invalid example fails const example3: ExampleUploadWithAttachments = { inputs: { text: "foo bar" }, @@ -1343,7 +1321,9 @@ test("annotationqueue crud", async () => { }, }; - const errorResponse = await client.uploadExamplesMultipart(uuidv4(), [example3]); + const errorResponse = await client.uploadExamplesMultipart(uuidv4(), [ + example3, + ]); expect(errorResponse).toHaveProperty("error"); // Clean up From 71cb320abac083571737c45f0902cedfc8d7abbc Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 17:43:53 -0800 Subject: [PATCH 06/23] fmt --- js/src/client.ts | 6 +++--- js/src/tests/client.int.test.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 95cbdcacd..fb67bc011 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -3850,9 +3850,9 @@ export class Client implements LangSmithTracingClientInterface { } /** - * Upsert examples with attachments using multipart form data. - * @param upserts List of ExampleUpsertWithAttachments objects to upsert - * @returns Promise with the upsert response + * Upload examples with attachments using multipart form data. + * @param uploads List of ExampleUploadWithAttachments objects to upload + * @returns Promise with the upload response */ public async uploadExamplesMultipart( datasetId: string, diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 45f883dc2..6602bff80 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1247,7 +1247,7 @@ test("annotationqueue crud", async () => { test("upload examples multipart", async () => { const client = new Client(); - const datasetName = `__test_upsert_examples_multipart${uuidv4().slice(0, 4)}`; + const datasetName = `__test_upload_examples_multipart${uuidv4().slice(0, 4)}`; // Clean up existing dataset if it exists if (await client.hasDataset({ datasetName })) { From 580d061f21464c51a849af0af8fbfbecdf55a07a Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:24:23 -0800 Subject: [PATCH 07/23] [js] update examples multipart (#1317) --- js/src/client.ts | 153 +++++++++++++++++++++++++++++++- js/src/schemas.ts | 32 +++++++ js/src/tests/client.int.test.ts | 138 +++++++++++++++++++++++++++- 3 files changed, 319 insertions(+), 4 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index fb67bc011..dc75eab07 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -37,6 +37,10 @@ import { Attachments, ExampleUploadWithAttachments, UploadExamplesResponse, + ExampleUpdateWithAttachments, + UpdateExamplesResponse, + RawExample, + AttachmentInfo, } from "./schemas.js"; import { convertLangChainMessageToExample, @@ -2717,7 +2721,34 @@ export class Client implements LangSmithTracingClientInterface { public async readExample(exampleId: string): Promise { assertUuid(exampleId); const path = `/examples/${exampleId}`; - return await this._get(path); + const rawExample: RawExample = await this._get(path); + const { attachment_urls, ...rest } = rawExample; + const example: Example = rest; + if (attachment_urls) { + const attachmentsArray = await Promise.all( + Object.entries(attachment_urls).map(async ([key, value]) => { + return { + key, + value: { + presigned_url: value.presigned_url, + reader: await fetch(value.presigned_url).then((response) => + response.arrayBuffer() + ), + }, + }; + }) + ); + example.attachments = attachmentsArray.reduce((acc, { key, value }) => { + acc[key.startsWith("attachment.") ? key.slice(11) : key] = value; + return acc; + }, {} as Record); + } + if (rawExample.metadata?.dataset_split) { + const { dataset_split, ...metadata } = rawExample.metadata; + example.split = dataset_split; + example.metadata = metadata; + } + return example; } public async *listExamples({ @@ -2789,11 +2820,40 @@ export class Client implements LangSmithTracingClientInterface { params.append("filter", filter); } let i = 0; - for await (const examples of this._getPaginated( + for await (const rawExamples of this._getPaginated( "/examples", params )) { - for (const example of examples) { + for (const rawExample of rawExamples) { + const { attachment_urls, ...rest } = rawExample; + const example: Example = rest; + if (attachment_urls) { + const attachmentsArray = await Promise.all( + Object.entries(attachment_urls).map(async ([key, value]) => { + return { + key, + value: { + presigned_url: value.presigned_url, + reader: await fetch(value.presigned_url).then((response) => + response.arrayBuffer() + ), + }, + }; + }) + ); + example.attachments = attachmentsArray.reduce( + (acc, { key, value }) => { + acc[key.startsWith("attachment.") ? key.slice(11) : key] = value; + return acc; + }, + {} as Record + ); + } + if (rawExample.metadata?.dataset_split) { + const { dataset_split, ...metadata } = rawExample.metadata; + example.split = dataset_split; + example.metadata = metadata; + } yield example; i++; } @@ -3849,6 +3909,93 @@ export class Client implements LangSmithTracingClientInterface { ); } + /** + * Update examples with attachments using multipart form data. + * @param updates List of ExampleUpdateWithAttachments objects to upsert + * @returns Promise with the update response + */ + public async updateExamplesMultipart( + datasetId: string, + updates: ExampleUpdateWithAttachments[] = [] + ): Promise { + const formData = new FormData(); + + for (const example of updates) { + const exampleId = example.id; + + // Prepare the main example body + const exampleBody = { + ...(example.metadata && { metadata: example.metadata }), + ...(example.split && { split: example.split }), + }; + + // Add main example data + const stringifiedExample = stringifyForTracing(exampleBody); + const exampleBlob = new Blob([stringifiedExample], { + type: "application/json", + }); + formData.append(exampleId, exampleBlob); + + // Add inputs + if (example.inputs) { + const stringifiedInputs = stringifyForTracing(example.inputs); + const inputsBlob = new Blob([stringifiedInputs], { + type: "application/json", + }); + formData.append(`${exampleId}.inputs`, inputsBlob); + } + + // Add outputs if present + if (example.outputs) { + const stringifiedOutputs = stringifyForTracing(example.outputs); + const outputsBlob = new Blob([stringifiedOutputs], { + type: "application/json", + }); + formData.append(`${exampleId}.outputs`, outputsBlob); + } + + // Add attachments if present + if (example.attachments) { + for (const [name, [mimeType, data]] of Object.entries( + example.attachments + )) { + const attachmentBlob = new Blob([data], { + type: `${mimeType}; length=${data.byteLength}`, + }); + formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + } + } + + if (example.attachments_operations) { + const stringifiedAttachmentsOperations = stringifyForTracing( + example.attachments_operations + ); + const attachmentsOperationsBlob = new Blob( + [stringifiedAttachmentsOperations], + { + type: "application/json", + } + ); + formData.append( + `${exampleId}.attachments_operations`, + attachmentsOperationsBlob + ); + } + } + + const response = await this.caller.call( + _getFetchImplementation(), + `${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`, + { + method: "PATCH", + headers: this.headers, + body: formData, + } + ); + const result = await response.json(); + return result; + } + /** * Upload examples with attachments using multipart form data. * @param uploads List of ExampleUploadWithAttachments objects to upload diff --git a/js/src/schemas.ts b/js/src/schemas.ts index d8f8dbba9..36ad11211 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -63,6 +63,11 @@ export interface BaseExample { source_run_id?: string; } +export interface AttachmentInfo { + presigned_url: string; + reader: Uint8Array | ArrayBuffer; +} + export type AttachmentData = Uint8Array | ArrayBuffer; export type Attachments = Record; @@ -258,17 +263,44 @@ export interface ExampleUploadWithAttachments { attachments?: Attachments; } +export interface ExampleUpdateWithAttachments { + id: string; + inputs?: KVMap; + outputs?: KVMap; + metadata?: KVMap; + split?: string | string[]; + attachments?: Attachments; + attachments_operations?: KVMap; +} + export interface UploadExamplesResponse { count: number; example_ids: string[]; } +export interface UpdateExamplesResponse extends UploadExamplesResponse {} + export interface Example extends BaseExample { id: string; created_at: string; modified_at: string; source_run_id?: string; runs: Run[]; + attachments?: Record; + split?: string | string[]; +} + +interface RawAttachmentInfo { + presigned_url: string; + s3_url: string; +} +export interface RawExample extends BaseExample { + id: string; + created_at: string; + modified_at: string; + source_run_id?: string; + runs: Run[]; + attachment_urls?: Record; } export interface ExampleUpdate { diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 6602bff80..453848768 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1,4 +1,10 @@ -import { Dataset, Example, Run, TracerSession } from "../schemas.js"; +import { + Dataset, + Example, + ExampleUpdateWithAttachments, + Run, + TracerSession, +} from "../schemas.js"; import { FunctionMessage, HumanMessage, @@ -1329,3 +1335,133 @@ test("upload examples multipart", async () => { // Clean up await client.deleteDataset({ datasetName }); }); + +test("update examples multipart", async () => { + const client = new Client({ + apiKey: "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", + }); + const datasetName = `__test_update_examples_multipart${uuidv4().slice(0, 4)}`; + + // Clean up existing dataset if it exists + if (await client.hasDataset({ datasetName })) { + await client.deleteDataset({ datasetName }); + } + + // Create actual dataset + const dataset = await client.createDataset(datasetName, { + description: "Test dataset for multipart example upload", + dataType: "kv", + }); + + const pathname = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "test_data", + "parrot-icon.png" + ); + // Create test examples + const exampleId = uuidv4(); + const example: ExampleUploadWithAttachments = { + id: exampleId, + metadata: { bar: "foo" }, + inputs: { text: "hello world" }, + // check that passing no outputs works fine + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + foo: ["image/png", fs.readFileSync(pathname)], + }, + }; + + // Create examples + await client.uploadExamplesMultipart(dataset.id, [example]); + + const exampleUpdate1: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world" }, + attachments_operations: { + retain: ["test_file"], + rename: { foo: "test_file" }, + }, + }; + + let response = await client.updateExamplesMultipart(dataset.id, [ + exampleUpdate1, + ]); + expect(response).toHaveProperty("error"); + + const exampleUpdate2: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world" }, + attachments_operations: { + retain: ["test_file"], + rename: { test_file: "test_file2" }, + }, + }; + + response = await client.updateExamplesMultipart(dataset.id, [exampleUpdate2]); + expect(response).toHaveProperty("error"); + + const exampleUpdate3: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world2" }, + attachments_operations: { + retain: ["test_file"], + rename: { foo: "bar" }, + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate3]); + + let updatedExample = await client.readExample(exampleId); + expect(updatedExample.inputs.text).toEqual("hello world2"); + expect(Object.keys(updatedExample.attachments ?? {}).sort()).toEqual( + ["bar", "test_file"].sort() + ); + expect(updatedExample.metadata).toEqual({ bar: "foo" }); + let attachmentData = new TextDecoder().decode( + await updatedExample.attachments?.test_file.reader + ); + expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + attachmentData = new TextDecoder().decode( + await updatedExample.attachments?.bar.reader + ); + expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + + const exampleUpdate4: ExampleUpdateWithAttachments = { + id: exampleId, + metadata: { foo: "bar" }, + attachments: { + test_file2: ["image/png", fs.readFileSync(pathname)], + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate4]); + updatedExample = await client.readExample(exampleId); + expect(updatedExample.metadata).toEqual({ foo: "bar" }); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file2"]); + attachmentData = new TextDecoder().decode( + await updatedExample.attachments?.test_file2.reader + ); + expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + + const exampleUpdate5: ExampleUpdateWithAttachments = { + id: exampleId, + split: ["foo", "bar"], + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate5]); + + updatedExample = await client.readExample(exampleId); + expect(updatedExample.metadata).toEqual({ foo: "bar" }); + expect(updatedExample.split).toEqual(["foo", "bar"]); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file"]); + attachmentData = new TextDecoder().decode( + await updatedExample.attachments?.test_file.reader + ); + expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + + // Clean up + await client.deleteDataset({ datasetName }); +}); From 298058b2b68304fedd97fbc6ba80e530b0339626 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 11:04:18 -0800 Subject: [PATCH 08/23] fix --- js/src/client.ts | 22 +++++++++---- js/src/schemas.ts | 2 +- js/src/tests/client.int.test.ts | 56 +++++++++++++++++++++++---------- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index dc75eab07..28c9bf751 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -2731,15 +2731,20 @@ export class Client implements LangSmithTracingClientInterface { key, value: { presigned_url: value.presigned_url, - reader: await fetch(value.presigned_url).then((response) => - response.arrayBuffer() + reader: await fetch(value.presigned_url).then( + (response) => response.body ), }, }; }) ); example.attachments = attachmentsArray.reduce((acc, { key, value }) => { - acc[key.startsWith("attachment.") ? key.slice(11) : key] = value; + if (value.reader != null) { + acc[key.startsWith("attachment.") ? key.slice(11) : key] = { + ...value, + reader: value.reader, + }; + } return acc; }, {} as Record); } @@ -2834,8 +2839,8 @@ export class Client implements LangSmithTracingClientInterface { key, value: { presigned_url: value.presigned_url, - reader: await fetch(value.presigned_url).then((response) => - response.arrayBuffer() + reader: await fetch(value.presigned_url).then( + (response) => response.body ), }, }; @@ -2843,7 +2848,12 @@ export class Client implements LangSmithTracingClientInterface { ); example.attachments = attachmentsArray.reduce( (acc, { key, value }) => { - acc[key.startsWith("attachment.") ? key.slice(11) : key] = value; + if (value.reader != null) { + acc[key.startsWith("attachment.") ? key.slice(11) : key] = { + ...value, + reader: value.reader, + }; + } return acc; }, {} as Record diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 36ad11211..44e7866a8 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -65,7 +65,7 @@ export interface BaseExample { export interface AttachmentInfo { presigned_url: string; - reader: Uint8Array | ArrayBuffer; + reader: ReadableStream; } export type AttachmentData = Uint8Array | ArrayBuffer; diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 453848768..3112a6e86 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1336,6 +1336,30 @@ test("upload examples multipart", async () => { await client.deleteDataset({ datasetName }); }); +async function readFromStream(reader: ReadableStream) { + const streamReader = reader.getReader(); + const chunks: Uint8Array[] = []; + + let done = false; + while (!done) { + const { done: doneInner, value } = await streamReader.read(); + done = doneInner; + if (value) chunks.push(value); + } + + // Combine chunks into a single Uint8Array if needed + const fullData = new Uint8Array( + chunks.reduce((acc, chunk) => acc + chunk.length, 0) + ); + let offset = 0; + for (const chunk of chunks) { + fullData.set(chunk, offset); + offset += chunk.length; + } + + return fullData; +} + test("update examples multipart", async () => { const client = new Client({ apiKey: "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", @@ -1417,14 +1441,14 @@ test("update examples multipart", async () => { ["bar", "test_file"].sort() ); expect(updatedExample.metadata).toEqual({ bar: "foo" }); - let attachmentData = new TextDecoder().decode( - await updatedExample.attachments?.test_file.reader - ); - expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); - attachmentData = new TextDecoder().decode( - await updatedExample.attachments?.bar.reader - ); - expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + let attachmentData: Uint8Array | undefined = + updatedExample.attachments?.test_file.reader && + (await readFromStream(updatedExample.attachments?.test_file.reader)); + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); + attachmentData = + updatedExample.attachments?.bar.reader && + (await readFromStream(updatedExample.attachments?.bar.reader)); + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); const exampleUpdate4: ExampleUpdateWithAttachments = { id: exampleId, @@ -1438,10 +1462,10 @@ test("update examples multipart", async () => { updatedExample = await client.readExample(exampleId); expect(updatedExample.metadata).toEqual({ foo: "bar" }); expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file2"]); - attachmentData = new TextDecoder().decode( - await updatedExample.attachments?.test_file2.reader - ); - expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + attachmentData = + updatedExample.attachments?.test_file2.reader && + (await readFromStream(updatedExample.attachments?.test_file2.reader)); + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); const exampleUpdate5: ExampleUpdateWithAttachments = { id: exampleId, @@ -1457,10 +1481,10 @@ test("update examples multipart", async () => { expect(updatedExample.metadata).toEqual({ foo: "bar" }); expect(updatedExample.split).toEqual(["foo", "bar"]); expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file"]); - attachmentData = new TextDecoder().decode( - await updatedExample.attachments?.test_file.reader - ); - expect(attachmentData).toEqual(fs.readFileSync(pathname).toString()); + attachmentData = + updatedExample.attachments?.test_file.reader && + (await readFromStream(updatedExample.attachments?.test_file.reader)); + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); // Clean up await client.deleteDataset({ datasetName }); From 8f6bf08eb5e53505b5f7e1457e253885dd983e45 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 12:44:52 -0800 Subject: [PATCH 09/23] change stream --- js/src/client.ts | 17 +++++++++++------ js/src/schemas.ts | 4 +++- js/src/tests/client.int.test.ts | 25 +++++-------------------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 28c9bf751..1790c364e 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -66,6 +66,7 @@ import { raiseForStatus } from "./utils/error.js"; import { _getFetchImplementation } from "./singletons/fetch.js"; import { stringify as stringifyForTracing } from "./utils/fast-safe-stringify/index.js"; +import { IterableReadableStream } from "./utils/stream.js"; export interface ClientConfig { apiUrl?: string; @@ -2727,13 +2728,15 @@ export class Client implements LangSmithTracingClientInterface { if (attachment_urls) { const attachmentsArray = await Promise.all( Object.entries(attachment_urls).map(async ([key, value]) => { + async function* fetchReader() { + const response = await fetch(value.presigned_url); + yield *IterableReadableStream.fromReadableStream(response.body!); + } return { key, value: { presigned_url: value.presigned_url, - reader: await fetch(value.presigned_url).then( - (response) => response.body - ), + reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), }, }; }) @@ -2835,13 +2838,15 @@ export class Client implements LangSmithTracingClientInterface { if (attachment_urls) { const attachmentsArray = await Promise.all( Object.entries(attachment_urls).map(async ([key, value]) => { + async function* fetchReader() { + const response = await fetch(value.presigned_url); + yield *IterableReadableStream.fromReadableStream(response.body!); + } return { key, value: { presigned_url: value.presigned_url, - reader: await fetch(value.presigned_url).then( - (response) => response.body - ), + reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), }, }; }) diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 44e7866a8..11f5a2ac5 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -1,3 +1,5 @@ +import { IterableReadableStream } from "./utils/stream.js"; + export interface TracerSession { // The ID of the tenant, or organization tenant_id: string; @@ -65,7 +67,7 @@ export interface BaseExample { export interface AttachmentInfo { presigned_url: string; - reader: ReadableStream; + reader: IterableReadableStream; } export type AttachmentData = Uint8Array | ArrayBuffer; diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 3112a6e86..e9c5efd82 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -29,6 +29,7 @@ import { ChatOpenAI } from "@langchain/openai"; import { RunnableSequence } from "@langchain/core/runnables"; import { load } from "langchain/load"; import { _getFetchImplementation } from "../singletons/fetch.js"; +import { IterableReadableStream } from "../utils/stream.js"; type CheckOutputsType = boolean | ((run: Run) => boolean); async function waitUntilRunFound( @@ -1336,28 +1337,12 @@ test("upload examples multipart", async () => { await client.deleteDataset({ datasetName }); }); -async function readFromStream(reader: ReadableStream) { - const streamReader = reader.getReader(); +async function readFromStream(reader: IterableReadableStream) { const chunks: Uint8Array[] = []; - - let done = false; - while (!done) { - const { done: doneInner, value } = await streamReader.read(); - done = doneInner; - if (value) chunks.push(value); + for await (const chunk of reader) { + chunks.push(chunk); } - - // Combine chunks into a single Uint8Array if needed - const fullData = new Uint8Array( - chunks.reduce((acc, chunk) => acc + chunk.length, 0) - ); - let offset = 0; - for (const chunk of chunks) { - fullData.set(chunk, offset); - offset += chunk.length; - } - - return fullData; + return new Uint8Array(Buffer.concat(chunks)); } test("update examples multipart", async () => { From 4c5dd6c21d53aef82daff25749f8c323b1635e68 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 12:46:11 -0800 Subject: [PATCH 10/23] x --- js/src/types/stream.ts | 5 ++ js/src/utils/stream.ts | 118 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 js/src/types/stream.ts create mode 100644 js/src/utils/stream.ts diff --git a/js/src/types/stream.ts b/js/src/types/stream.ts new file mode 100644 index 000000000..17fefd501 --- /dev/null +++ b/js/src/types/stream.ts @@ -0,0 +1,5 @@ +// Make this a type to override ReadableStream's async iterator type in case +// the popular web-streams-polyfill is imported - the supplied types +// in that case don't quite match. +export type IterableReadableStreamInterface = ReadableStream & + AsyncIterable; \ No newline at end of file diff --git a/js/src/utils/stream.ts b/js/src/utils/stream.ts new file mode 100644 index 000000000..2a57af5c0 --- /dev/null +++ b/js/src/utils/stream.ts @@ -0,0 +1,118 @@ +import type { IterableReadableStreamInterface } from "../types/stream.js" + +// Re-exported for backwards compatibility +// Do NOT import this type from this file inside the project. Instead, always import from `types/stream.js` +export type { IterableReadableStreamInterface }; + +/* + * Support async iterator syntax for ReadableStreams in all environments. + * Source: https://github.com/MattiasBuelens/web-streams-polyfill/pull/122#issuecomment-1627354490 + */ +export class IterableReadableStream + extends ReadableStream + implements IterableReadableStreamInterface +{ + public reader: ReadableStreamDefaultReader; + + ensureReader() { + if (!this.reader) { + this.reader = this.getReader(); + } + } + + async next(): Promise> { + this.ensureReader(); + try { + const result = await this.reader.read(); + if (result.done) { + this.reader.releaseLock(); // release lock when stream becomes closed + return { + done: true, + value: undefined, + }; + } else { + return { + done: false, + value: result.value, + }; + } + } catch (e) { + this.reader.releaseLock(); // release lock when stream becomes errored + throw e; + } + } + + async return(): Promise> { + this.ensureReader(); + // If wrapped in a Node stream, cancel is already called. + if (this.locked) { + const cancelPromise = this.reader.cancel(); // cancel first, but don't await yet + this.reader.releaseLock(); // release lock first + await cancelPromise; // now await it + } + return { done: true, value: undefined }; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async throw(e: any): Promise> { + this.ensureReader(); + if (this.locked) { + const cancelPromise = this.reader.cancel(); // cancel first, but don't await yet + this.reader.releaseLock(); // release lock first + await cancelPromise; // now await it + } + throw e; + } + + [Symbol.asyncIterator]() { + return this; + } + + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore Not present in Node 18 types, required in latest Node 22 + async [Symbol.asyncDispose]() { + await this.return(); + } + + static fromReadableStream(stream: ReadableStream) { + // From https://developer.mozilla.org/en-US/docs/Web/API/Streams_API/Using_readable_streams#reading_the_stream + const reader = stream.getReader(); + return new IterableReadableStream({ + start(controller) { + return pump(); + function pump(): Promise { + return reader.read().then(({ done, value }) => { + // When no more data needs to be consumed, close the stream + if (done) { + controller.close(); + return; + } + // Enqueue the next data chunk into our target stream + controller.enqueue(value); + return pump(); + }); + } + }, + cancel() { + reader.releaseLock(); + }, + }); + } + + static fromAsyncGenerator(generator: AsyncGenerator) { + return new IterableReadableStream({ + async pull(controller) { + const { value, done } = await generator.next(); + // When no more data needs to be consumed, close the stream + if (done) { + controller.close(); + } + // Fix: `else if (value)` will hang the streaming when nullish value (e.g. empty string) is pulled + controller.enqueue(value); + }, + async cancel(reason) { + await generator.return(reason); + }, + }); + } +} \ No newline at end of file From dbfd754071489eff405f8077518e88db2f0051ba Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 15:55:41 -0800 Subject: [PATCH 11/23] x --- js/src/tests/client.int.test.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index e9c5efd82..28491d634 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1346,9 +1346,7 @@ async function readFromStream(reader: IterableReadableStream) { } test("update examples multipart", async () => { - const client = new Client({ - apiKey: "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", - }); + const client = new Client(); const datasetName = `__test_update_examples_multipart${uuidv4().slice(0, 4)}`; // Clean up existing dataset if it exists From 28f91793f302e7747589616dbb23d74b038fa3e2 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 16:10:06 -0800 Subject: [PATCH 12/23] x --- js/src/client.ts | 12 +----------- js/src/tests/client.int.test.ts | 3 +-- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 1790c364e..c6db15305 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -59,7 +59,7 @@ import { RunEvaluator, } from "./evaluation/evaluator.js"; import { __version__ } from "./index.js"; -import { assertUuid } from "./utils/_uuid.js"; +import { assertUuid } from "./utils/uuid.js"; import { warnOnce } from "./utils/warn.js"; import { parsePromptIdentifier } from "./utils/prompts.js"; import { raiseForStatus } from "./utils/error.js"; @@ -2751,11 +2751,6 @@ export class Client implements LangSmithTracingClientInterface { return acc; }, {} as Record); } - if (rawExample.metadata?.dataset_split) { - const { dataset_split, ...metadata } = rawExample.metadata; - example.split = dataset_split; - example.metadata = metadata; - } return example; } @@ -2864,11 +2859,6 @@ export class Client implements LangSmithTracingClientInterface { {} as Record ); } - if (rawExample.metadata?.dataset_split) { - const { dataset_split, ...metadata } = rawExample.metadata; - example.split = dataset_split; - example.metadata = metadata; - } yield example; i++; } diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 28491d634..53b224610 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1461,8 +1461,7 @@ test("update examples multipart", async () => { await client.updateExamplesMultipart(dataset.id, [exampleUpdate5]); updatedExample = await client.readExample(exampleId); - expect(updatedExample.metadata).toEqual({ foo: "bar" }); - expect(updatedExample.split).toEqual(["foo", "bar"]); + expect(updatedExample.metadata).toEqual({ foo: "bar", dataset_split: ["foo", "bar"] }); expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file"]); attachmentData = updatedExample.attachments?.test_file.reader && From 219f6e7a18caa11f4ae0a03a4ce31ba8bd7d7483 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 16:12:22 -0800 Subject: [PATCH 13/23] fmt --- js/src/client.ts | 12 ++++++++---- js/src/evaluation/_runner.ts | 2 +- js/src/tests/client.int.test.ts | 5 ++++- js/src/types/stream.ts | 2 +- js/src/utils/stream.ts | 4 ++-- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index c6db15305..d2b86e78a 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -59,7 +59,7 @@ import { RunEvaluator, } from "./evaluation/evaluator.js"; import { __version__ } from "./index.js"; -import { assertUuid } from "./utils/uuid.js"; +import { assertUuid } from "./utils/_uuid.js"; import { warnOnce } from "./utils/warn.js"; import { parsePromptIdentifier } from "./utils/prompts.js"; import { raiseForStatus } from "./utils/error.js"; @@ -2730,7 +2730,7 @@ export class Client implements LangSmithTracingClientInterface { Object.entries(attachment_urls).map(async ([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); - yield *IterableReadableStream.fromReadableStream(response.body!); + yield* IterableReadableStream.fromReadableStream(response.body!); } return { key, @@ -2835,13 +2835,17 @@ export class Client implements LangSmithTracingClientInterface { Object.entries(attachment_urls).map(async ([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); - yield *IterableReadableStream.fromReadableStream(response.body!); + yield* IterableReadableStream.fromReadableStream( + response.body! + ); } return { key, value: { presigned_url: value.presigned_url, - reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), + reader: IterableReadableStream.fromAsyncGenerator( + fetchReader() + ), }, }; }) diff --git a/js/src/evaluation/_runner.ts b/js/src/evaluation/_runner.ts index cac6b5f4c..caf27aee7 100644 --- a/js/src/evaluation/_runner.ts +++ b/js/src/evaluation/_runner.ts @@ -2,7 +2,7 @@ import { Client, RunTree, RunTreeConfig } from "../index.js"; import { BaseRun, Example, KVMap, Run, TracerSession } from "../schemas.js"; import { traceable } from "../traceable.js"; import { getDefaultRevisionId, getGitInfo } from "../utils/_git.js"; -import { assertUuid } from "../utils/_uuid.js"; +import { assertUuid } from "../utils/uuid.js"; import { AsyncCaller } from "../utils/async_caller.js"; import { atee } from "../utils/atee.js"; import { getLangChainEnvVarsMetadata } from "../utils/env.js"; diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 53b224610..f145b304d 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1461,7 +1461,10 @@ test("update examples multipart", async () => { await client.updateExamplesMultipart(dataset.id, [exampleUpdate5]); updatedExample = await client.readExample(exampleId); - expect(updatedExample.metadata).toEqual({ foo: "bar", dataset_split: ["foo", "bar"] }); + expect(updatedExample.metadata).toEqual({ + foo: "bar", + dataset_split: ["foo", "bar"], + }); expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file"]); attachmentData = updatedExample.attachments?.test_file.reader && diff --git a/js/src/types/stream.ts b/js/src/types/stream.ts index 17fefd501..ae03b69b7 100644 --- a/js/src/types/stream.ts +++ b/js/src/types/stream.ts @@ -2,4 +2,4 @@ // the popular web-streams-polyfill is imported - the supplied types // in that case don't quite match. export type IterableReadableStreamInterface = ReadableStream & - AsyncIterable; \ No newline at end of file + AsyncIterable; diff --git a/js/src/utils/stream.ts b/js/src/utils/stream.ts index 2a57af5c0..121ce48a3 100644 --- a/js/src/utils/stream.ts +++ b/js/src/utils/stream.ts @@ -1,4 +1,4 @@ -import type { IterableReadableStreamInterface } from "../types/stream.js" +import type { IterableReadableStreamInterface } from "../types/stream.js"; // Re-exported for backwards compatibility // Do NOT import this type from this file inside the project. Instead, always import from `types/stream.js` @@ -115,4 +115,4 @@ export class IterableReadableStream }, }); } -} \ No newline at end of file +} From 2a4d3e4cff7b54beb14dc7278de749cc9501658b Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 16:13:55 -0800 Subject: [PATCH 14/23] fmt --- js/src/evaluation/_runner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/src/evaluation/_runner.ts b/js/src/evaluation/_runner.ts index caf27aee7..cac6b5f4c 100644 --- a/js/src/evaluation/_runner.ts +++ b/js/src/evaluation/_runner.ts @@ -2,7 +2,7 @@ import { Client, RunTree, RunTreeConfig } from "../index.js"; import { BaseRun, Example, KVMap, Run, TracerSession } from "../schemas.js"; import { traceable } from "../traceable.js"; import { getDefaultRevisionId, getGitInfo } from "../utils/_git.js"; -import { assertUuid } from "../utils/uuid.js"; +import { assertUuid } from "../utils/_uuid.js"; import { AsyncCaller } from "../utils/async_caller.js"; import { atee } from "../utils/atee.js"; import { getLangChainEnvVarsMetadata } from "../utils/env.js"; From 04aff970696a0505737b11b3b5aeabe942af39ba Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 16:19:20 -0800 Subject: [PATCH 15/23] x --- js/src/client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index d2b86e78a..f91b9c82a 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -2743,7 +2743,7 @@ export class Client implements LangSmithTracingClientInterface { ); example.attachments = attachmentsArray.reduce((acc, { key, value }) => { if (value.reader != null) { - acc[key.startsWith("attachment.") ? key.slice(11) : key] = { + acc[key.startsWith("attachment.") ? key.slice("attachment.".length) : key] = { ...value, reader: value.reader, }; @@ -2853,7 +2853,7 @@ export class Client implements LangSmithTracingClientInterface { example.attachments = attachmentsArray.reduce( (acc, { key, value }) => { if (value.reader != null) { - acc[key.startsWith("attachment.") ? key.slice(11) : key] = { + acc[key.startsWith("attachment.") ? key.slice("attachment.".length) : key] = { ...value, reader: value.reader, }; From 630e27fd143edef5e07e81a390652298348f74d2 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 16:26:45 -0800 Subject: [PATCH 16/23] fmt --- js/src/client.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index f91b9c82a..221fc1ffa 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -2743,7 +2743,11 @@ export class Client implements LangSmithTracingClientInterface { ); example.attachments = attachmentsArray.reduce((acc, { key, value }) => { if (value.reader != null) { - acc[key.startsWith("attachment.") ? key.slice("attachment.".length) : key] = { + acc[ + key.startsWith("attachment.") + ? key.slice("attachment.".length) + : key + ] = { ...value, reader: value.reader, }; @@ -2853,7 +2857,11 @@ export class Client implements LangSmithTracingClientInterface { example.attachments = attachmentsArray.reduce( (acc, { key, value }) => { if (value.reader != null) { - acc[key.startsWith("attachment.") ? key.slice("attachment.".length) : key] = { + acc[ + key.startsWith("attachment.") + ? key.slice("attachment.".length) + : key + ] = { ...value, reader: value.reader, }; From e2761baea2d54f12b84e2b00827bf66eced25fdc Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 17:37:43 -0800 Subject: [PATCH 17/23] x --- js/src/client.ts | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 221fc1ffa..5725d9acd 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -767,6 +767,11 @@ export class Client implements LangSmithTracingClientInterface { ); } + private async _getMultiPartSupport(): Promise { + const serverInfo = await this._ensureServerInfo(); + return serverInfo.instance_flags?.dataset_examples_multipart_enabled ?? false; + } + private drainAutoBatchQueue(batchSizeLimit: number) { while (this.autoBatchQueue.items.length > 0) { const [batch, done] = this.autoBatchQueue.pop(batchSizeLimit); @@ -2726,8 +2731,7 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = await Promise.all( - Object.entries(attachment_urls).map(async ([key, value]) => { + const attachmentsArray = Object.entries(attachment_urls).map(([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); yield* IterableReadableStream.fromReadableStream(response.body!); @@ -2739,8 +2743,8 @@ export class Client implements LangSmithTracingClientInterface { reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), }, }; - }) - ); + }); + // add attachments back to the example example.attachments = attachmentsArray.reduce((acc, { key, value }) => { if (value.reader != null) { acc[ @@ -2835,8 +2839,7 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = await Promise.all( - Object.entries(attachment_urls).map(async ([key, value]) => { + const attachmentsArray = Object.entries(attachment_urls).map(([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); yield* IterableReadableStream.fromReadableStream( @@ -2852,8 +2855,8 @@ export class Client implements LangSmithTracingClientInterface { ), }, }; - }) - ); + }); + // add attachments back to the example example.attachments = attachmentsArray.reduce( (acc, { key, value }) => { if (value.reader != null) { @@ -3935,6 +3938,11 @@ export class Client implements LangSmithTracingClientInterface { datasetId: string, updates: ExampleUpdateWithAttachments[] = [] ): Promise { + if (!(await this._getMultiPartSupport())) { + throw new Error( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ); + } const formData = new FormData(); for (const example of updates) { @@ -4022,6 +4030,11 @@ export class Client implements LangSmithTracingClientInterface { datasetId: string, uploads: ExampleUploadWithAttachments[] = [] ): Promise { + if (!(await this._getMultiPartSupport())) { + throw new Error( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ); + } const formData = new FormData(); for (const example of uploads) { From 5746d289ab7c78384133013ff21efee89eef23c4 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 17:39:08 -0800 Subject: [PATCH 18/23] fmt --- js/src/client.ts | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/js/src/client.ts b/js/src/client.ts index 5725d9acd..8126eac97 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -769,7 +769,9 @@ export class Client implements LangSmithTracingClientInterface { private async _getMultiPartSupport(): Promise { const serverInfo = await this._ensureServerInfo(); - return serverInfo.instance_flags?.dataset_examples_multipart_enabled ?? false; + return ( + serverInfo.instance_flags?.dataset_examples_multipart_enabled ?? false + ); } private drainAutoBatchQueue(batchSizeLimit: number) { @@ -2731,7 +2733,8 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = Object.entries(attachment_urls).map(([key, value]) => { + const attachmentsArray = Object.entries(attachment_urls).map( + ([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); yield* IterableReadableStream.fromReadableStream(response.body!); @@ -2743,7 +2746,8 @@ export class Client implements LangSmithTracingClientInterface { reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), }, }; - }); + } + ); // add attachments back to the example example.attachments = attachmentsArray.reduce((acc, { key, value }) => { if (value.reader != null) { @@ -2839,7 +2843,8 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = Object.entries(attachment_urls).map(([key, value]) => { + const attachmentsArray = Object.entries(attachment_urls).map( + ([key, value]) => { async function* fetchReader() { const response = await fetch(value.presigned_url); yield* IterableReadableStream.fromReadableStream( @@ -2855,7 +2860,8 @@ export class Client implements LangSmithTracingClientInterface { ), }, }; - }); + } + ); // add attachments back to the example example.attachments = attachmentsArray.reduce( (acc, { key, value }) => { From e241dee60cb707dfa2f154aeb1ba021986f2411f Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Tue, 10 Dec 2024 18:14:47 -0800 Subject: [PATCH 19/23] Remove reader --- js/src/client.ts | 71 ++++--------------- js/src/schemas.ts | 3 - js/src/tests/client.int.test.ts | 63 ++++++++++------- js/src/utils/stream.ts | 118 -------------------------------- 4 files changed, 51 insertions(+), 204 deletions(-) delete mode 100644 js/src/utils/stream.ts diff --git a/js/src/client.ts b/js/src/client.ts index 8126eac97..f9e47d44d 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -66,7 +66,6 @@ import { raiseForStatus } from "./utils/error.js"; import { _getFetchImplementation } from "./singletons/fetch.js"; import { stringify as stringifyForTracing } from "./utils/fast-safe-stringify/index.js"; -import { IterableReadableStream } from "./utils/stream.js"; export interface ClientConfig { apiUrl?: string; @@ -2733,35 +2732,16 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = Object.entries(attachment_urls).map( - ([key, value]) => { - async function* fetchReader() { - const response = await fetch(value.presigned_url); - yield* IterableReadableStream.fromReadableStream(response.body!); - } - return { - key, - value: { - presigned_url: value.presigned_url, - reader: IterableReadableStream.fromAsyncGenerator(fetchReader()), - }, - }; - } - ); // add attachments back to the example - example.attachments = attachmentsArray.reduce((acc, { key, value }) => { - if (value.reader != null) { - acc[ - key.startsWith("attachment.") - ? key.slice("attachment.".length) - : key - ] = { - ...value, - reader: value.reader, + example.attachments = Object.entries(attachment_urls).reduce( + (acc, [key, value]) => { + acc[key] = { + presigned_url: value.presigned_url, }; - } - return acc; - }, {} as Record); + return acc; + }, + {} as Record + ); } return example; } @@ -2843,38 +2823,11 @@ export class Client implements LangSmithTracingClientInterface { const { attachment_urls, ...rest } = rawExample; const example: Example = rest; if (attachment_urls) { - const attachmentsArray = Object.entries(attachment_urls).map( - ([key, value]) => { - async function* fetchReader() { - const response = await fetch(value.presigned_url); - yield* IterableReadableStream.fromReadableStream( - response.body! - ); - } - return { - key, - value: { - presigned_url: value.presigned_url, - reader: IterableReadableStream.fromAsyncGenerator( - fetchReader() - ), - }, + example.attachments = Object.entries(attachment_urls).reduce( + (acc, [key, value]) => { + acc[key] = { + presigned_url: value.presigned_url, }; - } - ); - // add attachments back to the example - example.attachments = attachmentsArray.reduce( - (acc, { key, value }) => { - if (value.reader != null) { - acc[ - key.startsWith("attachment.") - ? key.slice("attachment.".length) - : key - ] = { - ...value, - reader: value.reader, - }; - } return acc; }, {} as Record diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 11f5a2ac5..b4c138cd5 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -1,5 +1,3 @@ -import { IterableReadableStream } from "./utils/stream.js"; - export interface TracerSession { // The ID of the tenant, or organization tenant_id: string; @@ -67,7 +65,6 @@ export interface BaseExample { export interface AttachmentInfo { presigned_url: string; - reader: IterableReadableStream; } export type AttachmentData = Uint8Array | ArrayBuffer; diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index f145b304d..7308c90f6 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -29,7 +29,6 @@ import { ChatOpenAI } from "@langchain/openai"; import { RunnableSequence } from "@langchain/core/runnables"; import { load } from "langchain/load"; import { _getFetchImplementation } from "../singletons/fetch.js"; -import { IterableReadableStream } from "../utils/stream.js"; type CheckOutputsType = boolean | ((run: Run) => boolean); async function waitUntilRunFound( @@ -1337,14 +1336,6 @@ test("upload examples multipart", async () => { await client.deleteDataset({ datasetName }); }); -async function readFromStream(reader: IterableReadableStream) { - const chunks: Uint8Array[] = []; - for await (const chunk of reader) { - chunks.push(chunk); - } - return new Uint8Array(Buffer.concat(chunks)); -} - test("update examples multipart", async () => { const client = new Client(); const datasetName = `__test_update_examples_multipart${uuidv4().slice(0, 4)}`; @@ -1421,16 +1412,26 @@ test("update examples multipart", async () => { let updatedExample = await client.readExample(exampleId); expect(updatedExample.inputs.text).toEqual("hello world2"); expect(Object.keys(updatedExample.attachments ?? {}).sort()).toEqual( - ["bar", "test_file"].sort() + ["attachment.bar", "attachment.test_file"].sort() ); expect(updatedExample.metadata).toEqual({ bar: "foo" }); - let attachmentData: Uint8Array | undefined = - updatedExample.attachments?.test_file.reader && - (await readFromStream(updatedExample.attachments?.test_file.reader)); + let attachmentData: Uint8Array | undefined = updatedExample.attachments?.[ + "attachment.test_file" + ].presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); - attachmentData = - updatedExample.attachments?.bar.reader && - (await readFromStream(updatedExample.attachments?.bar.reader)); + attachmentData = updatedExample.attachments?.["attachment.bar"].presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.bar"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); const exampleUpdate4: ExampleUpdateWithAttachments = { @@ -1444,10 +1445,17 @@ test("update examples multipart", async () => { await client.updateExamplesMultipart(dataset.id, [exampleUpdate4]); updatedExample = await client.readExample(exampleId); expect(updatedExample.metadata).toEqual({ foo: "bar" }); - expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file2"]); - attachmentData = - updatedExample.attachments?.test_file2.reader && - (await readFromStream(updatedExample.attachments?.test_file2.reader)); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual([ + "attachment.test_file2", + ]); + attachmentData = updatedExample.attachments?.["attachment.test_file2"] + .presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file2"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); const exampleUpdate5: ExampleUpdateWithAttachments = { @@ -1465,10 +1473,17 @@ test("update examples multipart", async () => { foo: "bar", dataset_split: ["foo", "bar"], }); - expect(Object.keys(updatedExample.attachments ?? {})).toEqual(["test_file"]); - attachmentData = - updatedExample.attachments?.test_file.reader && - (await readFromStream(updatedExample.attachments?.test_file.reader)); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual([ + "attachment.test_file", + ]); + attachmentData = updatedExample.attachments?.["attachment.test_file"] + .presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); // Clean up diff --git a/js/src/utils/stream.ts b/js/src/utils/stream.ts deleted file mode 100644 index 121ce48a3..000000000 --- a/js/src/utils/stream.ts +++ /dev/null @@ -1,118 +0,0 @@ -import type { IterableReadableStreamInterface } from "../types/stream.js"; - -// Re-exported for backwards compatibility -// Do NOT import this type from this file inside the project. Instead, always import from `types/stream.js` -export type { IterableReadableStreamInterface }; - -/* - * Support async iterator syntax for ReadableStreams in all environments. - * Source: https://github.com/MattiasBuelens/web-streams-polyfill/pull/122#issuecomment-1627354490 - */ -export class IterableReadableStream - extends ReadableStream - implements IterableReadableStreamInterface -{ - public reader: ReadableStreamDefaultReader; - - ensureReader() { - if (!this.reader) { - this.reader = this.getReader(); - } - } - - async next(): Promise> { - this.ensureReader(); - try { - const result = await this.reader.read(); - if (result.done) { - this.reader.releaseLock(); // release lock when stream becomes closed - return { - done: true, - value: undefined, - }; - } else { - return { - done: false, - value: result.value, - }; - } - } catch (e) { - this.reader.releaseLock(); // release lock when stream becomes errored - throw e; - } - } - - async return(): Promise> { - this.ensureReader(); - // If wrapped in a Node stream, cancel is already called. - if (this.locked) { - const cancelPromise = this.reader.cancel(); // cancel first, but don't await yet - this.reader.releaseLock(); // release lock first - await cancelPromise; // now await it - } - return { done: true, value: undefined }; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - async throw(e: any): Promise> { - this.ensureReader(); - if (this.locked) { - const cancelPromise = this.reader.cancel(); // cancel first, but don't await yet - this.reader.releaseLock(); // release lock first - await cancelPromise; // now await it - } - throw e; - } - - [Symbol.asyncIterator]() { - return this; - } - - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore Not present in Node 18 types, required in latest Node 22 - async [Symbol.asyncDispose]() { - await this.return(); - } - - static fromReadableStream(stream: ReadableStream) { - // From https://developer.mozilla.org/en-US/docs/Web/API/Streams_API/Using_readable_streams#reading_the_stream - const reader = stream.getReader(); - return new IterableReadableStream({ - start(controller) { - return pump(); - function pump(): Promise { - return reader.read().then(({ done, value }) => { - // When no more data needs to be consumed, close the stream - if (done) { - controller.close(); - return; - } - // Enqueue the next data chunk into our target stream - controller.enqueue(value); - return pump(); - }); - } - }, - cancel() { - reader.releaseLock(); - }, - }); - } - - static fromAsyncGenerator(generator: AsyncGenerator) { - return new IterableReadableStream({ - async pull(controller) { - const { value, done } = await generator.next(); - // When no more data needs to be consumed, close the stream - if (done) { - controller.close(); - } - // Fix: `else if (value)` will hang the streaming when nullish value (e.g. empty string) is pulled - controller.enqueue(value); - }, - async cancel(reason) { - await generator.return(reason); - }, - }); - } -} From c0d9299337d75c319775c324a275eba13d2997f1 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Tue, 10 Dec 2024 18:17:06 -0800 Subject: [PATCH 20/23] Remove unnecessary type --- js/src/types/stream.ts | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 js/src/types/stream.ts diff --git a/js/src/types/stream.ts b/js/src/types/stream.ts deleted file mode 100644 index ae03b69b7..000000000 --- a/js/src/types/stream.ts +++ /dev/null @@ -1,5 +0,0 @@ -// Make this a type to override ReadableStream's async iterator type in case -// the popular web-streams-polyfill is imported - the supplied types -// in that case don't quite match. -export type IterableReadableStreamInterface = ReadableStream & - AsyncIterable; From bd64dbff591e558ef99790e4c85037e653c38ad7 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 19:05:50 -0800 Subject: [PATCH 21/23] test --- js/src/tests/client.int.test.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 7308c90f6..3cd50d929 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1298,13 +1298,11 @@ test("upload examples multipart", async () => { expect(createdExamples.count).toBe(2); - const createdExample1 = await client.readExample( - createdExamples.example_ids[0] - ); + const createdExample1 = await client.readExample(exampleId); expect(createdExample1.inputs["text"]).toBe("hello world"); const createdExample2 = await client.readExample( - createdExamples.example_ids[1] + createdExamples.example_ids.find((id) => id !== exampleId)! ); expect(createdExample2.inputs["text"]).toBe("foo bar"); expect(createdExample2.outputs?.["response"]).toBe("baz"); From b3b0431b01334e1fead3d31758fd25774c5804a4 Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:42:08 -0800 Subject: [PATCH 22/23] update evaluate to use examples (#1225) --- js/src/client.ts | 7 + js/src/evaluation/_runner.ts | 77 ++- js/src/evaluation/evaluator.ts | 1 + js/src/schemas.ts | 4 +- js/src/tests/client.int.test.ts | 2 +- js/src/tests/evaluate_attachments.int.test.ts | 469 ++++++++++++++++++ 6 files changed, 543 insertions(+), 17 deletions(-) create mode 100644 js/src/tests/evaluate_attachments.int.test.ts diff --git a/js/src/client.ts b/js/src/client.ts index f9e47d44d..23f33d515 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -2757,6 +2757,7 @@ export class Client implements LangSmithTracingClientInterface { limit, offset, filter, + includeAttachments, }: { datasetId?: string; datasetName?: string; @@ -2768,6 +2769,7 @@ export class Client implements LangSmithTracingClientInterface { limit?: number; offset?: number; filter?: string; + includeAttachments?: boolean; } = {}): AsyncIterable { let datasetId_; if (datasetId !== undefined && datasetName !== undefined) { @@ -2814,6 +2816,11 @@ export class Client implements LangSmithTracingClientInterface { if (filter !== undefined) { params.append("filter", filter); } + if (includeAttachments === true) { + ["attachment_urls", "outputs", "metadata"].forEach((field) => + params.append("select", field) + ); + } let i = 0; for await (const rawExamples of this._getPaginated( "/examples", diff --git a/js/src/evaluation/_runner.ts b/js/src/evaluation/_runner.ts index cac6b5f4c..6c74afa34 100644 --- a/js/src/evaluation/_runner.ts +++ b/js/src/evaluation/_runner.ts @@ -1,5 +1,12 @@ import { Client, RunTree, RunTreeConfig } from "../index.js"; -import { BaseRun, Example, KVMap, Run, TracerSession } from "../schemas.js"; +import { + AttachmentInfo, + BaseRun, + Example, + KVMap, + Run, + TracerSession, +} from "../schemas.js"; import { traceable } from "../traceable.js"; import { getDefaultRevisionId, getGitInfo } from "../utils/_git.js"; import { assertUuid } from "../utils/_uuid.js"; @@ -22,11 +29,15 @@ import { ComparativeEvaluator, } from "./evaluate_comparative.js"; +export type TargetConfigT = KVMap & { + attachments?: Record; + callbacks?: any; +}; type StandardTargetT = - | ((input: TInput, config?: KVMap) => Promise) - | ((input: TInput, config?: KVMap) => TOutput) - | { invoke: (input: TInput, config?: KVMap) => TOutput } - | { invoke: (input: TInput, config?: KVMap) => Promise }; + | ((input: TInput, config?: TargetConfigT) => Promise) + | ((input: TInput, config?: TargetConfigT) => TOutput) + | { invoke: (input: TInput, config?: TargetConfigT) => TOutput } + | { invoke: (input: TInput, config?: TargetConfigT) => Promise }; type ComparativeTargetT = | Array @@ -98,6 +109,7 @@ export type EvaluatorT = inputs: Record; outputs: Record; referenceOutputs?: Record; + attachments?: Record; }) => EvaluationResult | EvaluationResults) | ((args: { run: Run; @@ -105,6 +117,7 @@ export type EvaluatorT = inputs: Record; outputs: Record; referenceOutputs?: Record; + attachments?: Record; }) => Promise); interface _ForwardResults { @@ -127,6 +140,7 @@ interface _ExperimentManagerArgs { examples?: Example[]; numRepetitions?: number; _runsArray?: Run[]; + includeAttachments?: boolean; } type BaseEvaluateOptions = { @@ -178,6 +192,11 @@ export interface EvaluateOptions extends BaseEvaluateOptions { * examples, or a generator of examples. */ data: DataT; + /** + * Whether to use attachments for the experiment. + * @default false + */ + includeAttachments?: boolean; } export interface ComparativeEvaluateOptions extends BaseEvaluateOptions { @@ -256,6 +275,8 @@ export class _ExperimentManager { _metadata: KVMap; _description?: string; + _includeAttachments?: boolean; + get experimentName(): string { if (this._experimentName) { return this._experimentName; @@ -271,7 +292,10 @@ export class _ExperimentManager { if (!this._data) { throw new Error("Data not provided in this experiment."); } - const unresolvedData = _resolveData(this._data, { client: this.client }); + const unresolvedData = _resolveData(this._data, { + client: this.client, + includeAttachments: this._includeAttachments, + }); if (!this._examples) { this._examples = []; } @@ -369,6 +393,7 @@ export class _ExperimentManager { this._evaluationResults = args.evaluationResults; this._summaryResults = args.summaryResults; this._numRepetitions = args.numRepetitions; + this._includeAttachments = args.includeAttachments; } _getExperiment(): TracerSession { @@ -465,6 +490,7 @@ export class _ExperimentManager { client: this.client, evaluationResults: this._evaluationResults, summaryResults: this._summaryResults, + includeAttachments: this._includeAttachments, }); } @@ -485,6 +511,7 @@ export class _ExperimentManager { yield pred.run; } })(), + includeAttachments: this._includeAttachments, }); } @@ -515,6 +542,7 @@ export class _ExperimentManager { } })(), summaryResults: this._summaryResults, + includeAttachments: this._includeAttachments, }); } @@ -532,6 +560,7 @@ export class _ExperimentManager { _runsArray: this._runsArray, evaluationResults: this._evaluationResults, summaryResults: aggregateFeedbackGen, + includeAttachments: this._includeAttachments, }); } @@ -603,7 +632,8 @@ export class _ExperimentManager { example, this.experimentName, this._metadata, - this.client + this.client, + this._includeAttachments ); } } else { @@ -621,7 +651,8 @@ export class _ExperimentManager { example, this.experimentName, this._metadata, - this.client + this.client, + this._includeAttachments ) ); } @@ -794,9 +825,9 @@ export class _ExperimentManager { return strMiliseconds ?? ""; } - const jsDate = new Date(date); + const jsDate = new Date(date!); - let source = getMiliseconds(date); + let source = getMiliseconds(date!); let parsed = getMiliseconds(jsDate.toISOString()); const length = Math.max(source.length, parsed.length); @@ -943,6 +974,7 @@ async function _evaluate( experiment: experiment_ ?? fields.experimentPrefix, runs: newRuns ?? undefined, numRepetitions: fields.numRepetitions ?? 1, + includeAttachments: standardFields.includeAttachments, }).start(); if (_isCallable(target)) { @@ -972,7 +1004,8 @@ async function _forward( example: Example, experimentName: string, metadata: KVMap, - client: Client + client: Client, + includeAttachments?: boolean ): Promise<_ForwardResults> { let run: BaseRun | null = null; @@ -1006,16 +1039,29 @@ async function _forward( // no-op } // Issue with retrieving LangChain callbacks, rely on interop - if (langChainCallbacks === undefined) { + if (langChainCallbacks === undefined && !includeAttachments) { return await fn.invoke(inputs); - } else { + } else if (langChainCallbacks === undefined && includeAttachments) { + return await fn.invoke(inputs, { + attachments: example.attachments, + }); + } else if (!includeAttachments) { return await fn.invoke(inputs, { callbacks: langChainCallbacks }); + } else { + return await fn.invoke(inputs, { + attachments: example.attachments, + callbacks: langChainCallbacks, + }); } }, options) : traceable(fn, options); try { - await wrappedFn(example.inputs); + if (includeAttachments && !("invoke" in fn)) { + await wrappedFn(example.inputs, { attachments: example.attachments }); + } else { + await wrappedFn(example.inputs); + } } catch (e) { console.error(`Error running target function: ${e}`); printErrorStackTrace(e); @@ -1037,6 +1083,7 @@ function _resolveData( data: DataT, options: { client: Client; + includeAttachments?: boolean; } ): AsyncGenerator { let isUUID = false; @@ -1052,11 +1099,13 @@ function _resolveData( if (typeof data === "string" && isUUID) { return options.client.listExamples({ datasetId: data, + includeAttachments: options.includeAttachments, }) as AsyncGenerator; } if (typeof data === "string") { return options.client.listExamples({ datasetName: data, + includeAttachments: options.includeAttachments, }) as AsyncGenerator; } return data as AsyncGenerator; diff --git a/js/src/evaluation/evaluator.ts b/js/src/evaluation/evaluator.ts index 4e64460d3..cd7ce1fdd 100644 --- a/js/src/evaluation/evaluator.ts +++ b/js/src/evaluation/evaluator.ts @@ -138,6 +138,7 @@ export class DynamicRunEvaluator any> inputs: example?.inputs, outputs: run?.outputs, referenceOutputs: example?.outputs, + attachments: example?.attachments, }, example ); diff --git a/js/src/schemas.ts b/js/src/schemas.ts index b4c138cd5..af7848e59 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -254,12 +254,12 @@ export interface ExampleCreate extends BaseExample { export interface ExampleUploadWithAttachments { id?: string; - created_at?: string; inputs: KVMap; outputs?: KVMap; metadata?: KVMap; split?: string | string[]; attachments?: Attachments; + created_at?: string; } export interface ExampleUpdateWithAttachments { @@ -282,7 +282,7 @@ export interface UpdateExamplesResponse extends UploadExamplesResponse {} export interface Example extends BaseExample { id: string; created_at: string; - modified_at: string; + modified_at?: string; source_run_id?: string; runs: Run[]; attachments?: Record; diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 3cd50d929..18582a01b 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -593,7 +593,7 @@ test.concurrent( expect(examplesList2.length).toEqual(3); const datasetDiff = await client.diffDatasetVersions({ datasetId: dataset.id, - fromVersion: initialVersion, + fromVersion: initialVersion!, toVersion: "latest", }); expect(datasetDiff.examples_added.length).toEqual(3); diff --git a/js/src/tests/evaluate_attachments.int.test.ts b/js/src/tests/evaluate_attachments.int.test.ts new file mode 100644 index 000000000..20112d57f --- /dev/null +++ b/js/src/tests/evaluate_attachments.int.test.ts @@ -0,0 +1,469 @@ +import { evaluate, TargetConfigT } from "../evaluation/_runner.js"; +import { ExampleUploadWithAttachments } from "../schemas.js"; +import { Client } from "../index.js"; +import { v4 as uuidv4 } from "uuid"; +import { RunnableLambda } from "@langchain/core/runnables"; + +function arraysEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +test("evaluate can handle examples with attachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async ( + _inputs: Record, + config?: TargetConfigT + ) => { + // Verify we receive the attachment data + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY + // THIS SHOULD BE FIXED ASAP + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("evaluate with attachments not in target function", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async (_inputs: Record) => { + return { answer: "test image" }; + }; + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY + // THIS SHOULD BE FIXED ASAP + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("multiple evaluators with attachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async ( + _inputs: Record, + config?: TargetConfigT + ) => { + // Verify we receive the attachment data + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + const customEvaluatorOne = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key1", + score: 1, + }; + }; + + const customEvaluatorTwo = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key2", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluatorOne, customEvaluatorTwo], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY + // THIS SHOULD BE FIXED ASAP + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(2); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult1 = evalResults.find((r) => r.key === "key1"); + expect(evalResult1?.score).toBe(1); + const evalResult2 = evalResults.find((r) => r.key === "key2"); + expect(evalResult2?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("evaluate with attachments runnable target function", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + const myFunction = async (_input: any, config?: any) => { + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + // Define target function that processes attachments + const targetFunc = RunnableLambda.from(myFunction); + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY + // THIS SHOULD BE FIXED ASAP + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("attachments don't appear without includeAttachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + const myFunction = async (_input: any, config?: any) => { + if (config?.attachments) { + throw new Error("Attachments should not exist!"); + } + return { answer: "test image" }; + }; + + // Define target function that processes attachments + const targetFunc = RunnableLambda.from(myFunction); + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeUndefined(); + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + // Cleanup + await client.deleteDataset({ datasetName }); +}); From 697bf39d210fa87be4b84f6f3883023bde0a8d2a Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Tue, 10 Dec 2024 20:36:15 -0800 Subject: [PATCH 23/23] Tone down TODO --- js/src/tests/evaluate_attachments.int.test.ts | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/js/src/tests/evaluate_attachments.int.test.ts b/js/src/tests/evaluate_attachments.int.test.ts index 20112d57f..9a78a3f9a 100644 --- a/js/src/tests/evaluate_attachments.int.test.ts +++ b/js/src/tests/evaluate_attachments.int.test.ts @@ -93,8 +93,7 @@ test("evaluate can handle examples with attachments", async () => { expect(evalRes.results).toHaveLength(2); for (const result of evalRes.results) { - // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY - // THIS SHOULD BE FIXED ASAP + // TODO: Uncomment when attachments are traced correctly // expect(result.run.attachments).toBeDefined(); expect(result.run).toBeDefined(); expect(result.example).toBeDefined(); @@ -172,8 +171,7 @@ test("evaluate with attachments not in target function", async () => { expect(evalRes.results).toHaveLength(2); for (const result of evalRes.results) { - // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY - // THIS SHOULD BE FIXED ASAP + // TODO: Uncomment when attachments are traced correctly // expect(result.run.attachments).toBeDefined(); expect(result.run).toBeDefined(); expect(result.example).toBeDefined(); @@ -297,8 +295,7 @@ test("multiple evaluators with attachments", async () => { expect(evalRes.results).toHaveLength(2); for (const result of evalRes.results) { - // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY - // THIS SHOULD BE FIXED ASAP + // TODO: Uncomment when attachments are traced correctly // expect(result.run.attachments).toBeDefined(); expect(result.run).toBeDefined(); expect(result.example).toBeDefined(); @@ -398,8 +395,7 @@ test("evaluate with attachments runnable target function", async () => { expect(evalRes.results).toHaveLength(2); for (const result of evalRes.results) { - // NOTE: THIS DOES NOT WORK YET BECAUSE THE ATTACHMENTS ARE NOT TRACED CORRECTLY - // THIS SHOULD BE FIXED ASAP + // TODO: Uncomment when attachments are traced correctly // expect(result.run.attachments).toBeDefined(); expect(result.run).toBeDefined(); expect(result.example).toBeDefined();