Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(js): upload and update examples multipart in JS #1216

Merged
merged 26 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 218 additions & 3 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
AnnotationQueue,
RunWithAnnotationQueueInfo,
Attachments,
ExampleUploadWithAttachments,
UploadExamplesResponse,
ExampleUpdateWithAttachments,
UpdateExamplesResponse,
RawExample,
AttachmentInfo,
} from "./schemas.js";
import {
convertLangChainMessageToExample,
Expand Down Expand Up @@ -417,7 +423,7 @@
// If there is an item on the queue we were unable to pop,
// just return it as a single batch.
if (popped.length === 0 && this.items.length > 0) {
const item = this.items.shift()!;

Check warning on line 426 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Forbidden non-null assertion
popped.push(item);
poppedSizeBytes += item.size;
this.sizeBytes -= item.size;
Expand Down Expand Up @@ -760,6 +766,13 @@
);
}

private async _getMultiPartSupport(): Promise<boolean> {
const serverInfo = await this._ensureServerInfo();
return (
serverInfo.instance_flags?.dataset_examples_multipart_enabled ?? false
);
}

private drainAutoBatchQueue(batchSizeLimit: number) {
while (this.autoBatchQueue.items.length > 0) {
const [batch, done] = this.autoBatchQueue.pop(batchSizeLimit);
Expand Down Expand Up @@ -833,7 +846,7 @@
if (this._serverInfo === undefined) {
try {
this._serverInfo = await this._getServerInfo();
} catch (e) {

Check warning on line 849 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'e' is defined but never used. Allowed unused args must match /^_/u
console.warn(
`[WARNING]: LangSmith failed to fetch info on supported operations. Falling back to batch operations and default limits.`
);
Expand Down Expand Up @@ -1551,7 +1564,7 @@
treeFilter?: string;
isRoot?: boolean;
dataSourceType?: string;
}): Promise<any> {

Check warning on line 1567 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
let projectIds_ = projectIds || [];
if (projectNames) {
projectIds_ = [
Expand Down Expand Up @@ -1839,7 +1852,7 @@
`Failed to list shared examples: ${response.status} ${response.statusText}`
);
}
return result.map((example: any) => ({

Check warning on line 1855 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
...example,
_hostUrl: this.getHostUrl(),
}));
Expand Down Expand Up @@ -1976,7 +1989,7 @@
}
// projectId querying
return true;
} catch (e) {

Check warning on line 1992 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'e' is defined but never used. Allowed unused args must match /^_/u
return false;
}
}
Expand Down Expand Up @@ -2715,7 +2728,22 @@
public async readExample(exampleId: string): Promise<Example> {
assertUuid(exampleId);
const path = `/examples/${exampleId}`;
return await this._get<Example>(path);
const rawExample: RawExample = await this._get(path);
const { attachment_urls, ...rest } = rawExample;
const example: Example = rest;
if (attachment_urls) {
// add attachments back to the example
example.attachments = Object.entries(attachment_urls).reduce(
(acc, [key, value]) => {
acc[key] = {
presigned_url: value.presigned_url,
};
return acc;
},
{} as Record<string, AttachmentInfo>
);
}
return example;
}

public async *listExamples({
Expand All @@ -2729,6 +2757,7 @@
limit,
offset,
filter,
includeAttachments,
}: {
datasetId?: string;
datasetName?: string;
Expand All @@ -2740,6 +2769,7 @@
limit?: number;
offset?: number;
filter?: string;
includeAttachments?: boolean;
} = {}): AsyncIterable<Example> {
let datasetId_;
if (datasetId !== undefined && datasetName !== undefined) {
Expand Down Expand Up @@ -2786,12 +2816,30 @@
if (filter !== undefined) {
params.append("filter", filter);
}
if (includeAttachments === true) {
["attachment_urls", "outputs", "metadata"].forEach((field) =>
params.append("select", field)
);
}
let i = 0;
for await (const examples of this._getPaginated<Example>(
for await (const rawExamples of this._getPaginated<RawExample>(
"/examples",
params
)) {
for (const example of examples) {
for (const rawExample of rawExamples) {
const { attachment_urls, ...rest } = rawExample;
const example: Example = rest;
if (attachment_urls) {
example.attachments = Object.entries(attachment_urls).reduce(
(acc, [key, value]) => {
acc[key] = {
presigned_url: value.presigned_url,
};
return acc;
},
{} as Record<string, AttachmentInfo>
);
}
yield example;
i++;
}
Expand Down Expand Up @@ -3316,7 +3364,7 @@
async _logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3367 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<[results: EvaluationResult[], feedbacks: Feedback[]]> {
const evalResults: Array<EvaluationResult> =
this._selectEvalResults(evaluatorResponse);
Expand Down Expand Up @@ -3355,7 +3403,7 @@
public async logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3406 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<EvaluationResult[]> {
const [results] = await this._logEvaluationFeedback(
evaluatorResponse,
Expand Down Expand Up @@ -3805,7 +3853,7 @@

public async createCommit(
promptIdentifier: string,
object: any,

Check warning on line 3856 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
options?: {
parentCommitHash?: string;
}
Expand Down Expand Up @@ -3847,6 +3895,173 @@
);
}

/**
* Update examples with attachments using multipart form data.
* @param updates List of ExampleUpdateWithAttachments objects to upsert
* @returns Promise with the update response
*/
public async updateExamplesMultipart(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we really naming a public API this?

Why not just updateExamples?

datasetId: string,
updates: ExampleUpdateWithAttachments[] = []
): Promise<UpdateExamplesResponse> {
if (!(await this._getMultiPartSupport())) {
throw new Error(
"Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
);
}
const formData = new FormData();

for (const example of updates) {
const exampleId = example.id;

// Prepare the main example body
const exampleBody = {
...(example.metadata && { metadata: example.metadata }),
...(example.split && { split: example.split }),
};

// Add main example data
const stringifiedExample = stringifyForTracing(exampleBody);
const exampleBlob = new Blob([stringifiedExample], {
type: "application/json",
});
formData.append(exampleId, exampleBlob);

// Add inputs
if (example.inputs) {
const stringifiedInputs = stringifyForTracing(example.inputs);
const inputsBlob = new Blob([stringifiedInputs], {
type: "application/json",
});
formData.append(`${exampleId}.inputs`, inputsBlob);
}

// Add outputs if present
if (example.outputs) {
const stringifiedOutputs = stringifyForTracing(example.outputs);
const outputsBlob = new Blob([stringifiedOutputs], {
type: "application/json",
});
formData.append(`${exampleId}.outputs`, outputsBlob);
}

// Add attachments if present
if (example.attachments) {
for (const [name, [mimeType, data]] of Object.entries(
example.attachments
)) {
const attachmentBlob = new Blob([data], {
type: `${mimeType}; length=${data.byteLength}`,
});
formData.append(`${exampleId}.attachment.${name}`, attachmentBlob);
}
}

if (example.attachments_operations) {
isahers1 marked this conversation as resolved.
Show resolved Hide resolved
const stringifiedAttachmentsOperations = stringifyForTracing(
example.attachments_operations
);
const attachmentsOperationsBlob = new Blob(
[stringifiedAttachmentsOperations],
{
type: "application/json",
}
);
formData.append(
`${exampleId}.attachments_operations`,
attachmentsOperationsBlob
);
}
}

const response = await this.caller.call(
_getFetchImplementation(),
`${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`,
{
method: "PATCH",
headers: this.headers,
body: formData,
}
);
const result = await response.json();
return result;
}

/**
* Upload examples with attachments using multipart form data.
* @param uploads List of ExampleUploadWithAttachments objects to upload
* @returns Promise with the upload response
*/
public async uploadExamplesMultipart(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above

datasetId: string,
uploads: ExampleUploadWithAttachments[] = []
): Promise<UploadExamplesResponse> {
if (!(await this._getMultiPartSupport())) {
throw new Error(
"Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
);
}
const formData = new FormData();

for (const example of uploads) {
const exampleId = (example.id ?? uuid.v4()).toString();

// Prepare the main example body
const exampleBody = {
created_at: example.created_at,
...(example.metadata && { metadata: example.metadata }),
...(example.split && { split: example.split }),
};

// Add main example data
const stringifiedExample = stringifyForTracing(exampleBody);
const exampleBlob = new Blob([stringifiedExample], {
type: "application/json",
});
formData.append(exampleId, exampleBlob);

// Add inputs
const stringifiedInputs = stringifyForTracing(example.inputs);
const inputsBlob = new Blob([stringifiedInputs], {
type: "application/json",
});
formData.append(`${exampleId}.inputs`, inputsBlob);

// Add outputs if present
if (example.outputs) {
const stringifiedOutputs = stringifyForTracing(example.outputs);
const outputsBlob = new Blob([stringifiedOutputs], {
type: "application/json",
});
formData.append(`${exampleId}.outputs`, outputsBlob);
}

// Add attachments if present
if (example.attachments) {
for (const [name, [mimeType, data]] of Object.entries(
example.attachments
)) {
const attachmentBlob = new Blob([data], {
type: `${mimeType}; length=${data.byteLength}`,
});
formData.append(`${exampleId}.attachment.${name}`, attachmentBlob);
}
}
}

const response = await this.caller.call(
_getFetchImplementation(),
`${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`,
{
method: "POST",
headers: this.headers,
body: formData,
}
);
const result = await response.json();
return result;
}

public async updatePrompt(
promptIdentifier: string,
options?: {
Expand All @@ -3856,7 +4071,7 @@
isPublic?: boolean;
isArchived?: boolean;
}
): Promise<Record<string, any>> {

Check warning on line 4074 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
if (!(await this.promptExists(promptIdentifier))) {
throw new Error("Prompt does not exist, you must create it first.");
}
Expand All @@ -3867,7 +4082,7 @@
throw await this._ownerConflictError("update a prompt", owner);
}

const payload: Record<string, any> = {};

Check warning on line 4085 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type

if (options?.description !== undefined)
payload.description = options.description;
Expand Down
Loading
Loading