From 75c55e9e5bc0eb916ad5c3f086663d976e2ee03a Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Wed, 11 Dec 2024 20:33:26 -0800 Subject: [PATCH] checkpoint --- .../evaluate_with_attachments.mdx | 375 +++++++++--------- docs/evaluation/how_to_guides/index.md | 2 +- 2 files changed, 196 insertions(+), 181 deletions(-) diff --git a/docs/evaluation/how_to_guides/evaluate_with_attachments.mdx b/docs/evaluation/how_to_guides/evaluate_with_attachments.mdx index e7c87394..a073fbb5 100644 --- a/docs/evaluation/how_to_guides/evaluate_with_attachments.mdx +++ b/docs/evaluation/how_to_guides/evaluate_with_attachments.mdx @@ -3,7 +3,7 @@ import { CodeTabs, PythonBlock, TypeScriptBlock, -} from "@site/src/components/InstructionsWithCode" +} from "@site/src/components/InstructionsWithCode"; # Evaluate applications with large file inputs @@ -27,15 +27,12 @@ Finally, attachments are more user-friendly in the LangSmith UI, as they are ren To upload examples with attachments using the SDK, you need to use the `upload_examples_multipart` method of the LangSmith client. This method allows you to pass in a list of examples with attachments. -Each attachment is represented as a dictionary, mapping the attachment name to a tuple containing the MIME type and the file content. -The file content can be either a path to the file or the bytes content of the file. - :::note Minimum SDK Versions The following features are available in the following SDK versions: - Python SDK: >=0.2.3 - JS/TS SDK: >=0.2.13 -::: + ::: { - const response = await fetch(url); - if (!response.ok) { - throw new Error(\`Failed to fetch \${url}\: $\{response.statusText\}\`); - } - return response.arrayBuffer(); +const response = await fetch(url); +if (!response.ok) { +throw new Error(\`Failed to fetch \${url}\: $\{response.statusText\}\`); +} +return response.arrayBuffer(); } // Fetch files as ArrayBuffer @@ -119,42 +133,42 @@ const datasetName = "attachment-test-dataset:" + uuid4().substring(0, 8); // Create the dataset const dataset = await langsmithClient.createDataset(datasetName, { - description: "Test dataset for evals with publicly available attachments", +description: "Test dataset for evals with publicly available attachments", }); // Define the example with attachments const exampleId = uuid4(); const example = { - id: exampleId, - inputs: { - audio_question: "What is in this audio clip?", - image_question: "What is in this image?", - }, - outputs: { - audio_answer: - "The sun rises in the east and sets in the west. This simple fact has been observed by humans for thousands of years.", - image_answer: "A mug with a blanket over it.", - }, - attachments: { - my_pdf: { - mimeType: "application/pdf", - data: pdfArrayBuffer - }, - my_wav: { - mimeType: "audio/wav", - data: wavArrayBuffer - }, - my_img: { - mimeType: "image/png", - data: pngArrayBuffer - }, - }, +id: exampleId, +inputs: { +audio_question: "What is in this audio clip?", +image_question: "What is in this image?", +}, +outputs: { +audio_answer: +"The sun rises in the east and sets in the west. This simple fact has been observed by humans for thousands of years.", +image_answer: "A mug with a blanket over it.", +}, +attachments: { +my_pdf: { +mimeType: "application/pdf", +data: pdfArrayBuffer +}, +my_wav: { +mimeType: "audio/wav", +data: wavArrayBuffer +}, +my_img: { +mimeType: "image/png", +data: pngArrayBuffer +}, +}, }; // Upload the example with attachments to the dataset await langsmithClient.uploadExamplesMultipart(dataset.id, [example]);`), - ]} - groupId="client-language" +]} +groupId="client-language" /> Once you upload examples with attachments, you can view them in the LangSmith UI. Each attachment will be rendered as a file with a preview, making it easy to inspect the contents. @@ -163,7 +177,7 @@ Once you upload examples with attachments, you can view them in the LangSmith UI ### From existing runs When adding runs to a LangSmith dataset, attachments can be selectively propagated from the source run to the destination example. -To do learn more, please see [this guide](./../datasets/manage_datasets_in_application#add-runs-from-the-tracing-project-ui). +To do learn more, please see [this guide](./manage_datasets_in_application#add-runs-from-the-tracing-project-ui). ### From the LangSmith UI @@ -193,7 +207,7 @@ The target function must have two positional arguments in order to consume the a } } ``` -::: + ::: :::tip Javascript Target Function with Attachments ::: @@ -208,31 +222,31 @@ from openai import OpenAI client = wrap_openai(OpenAI()) # Define target function that uses attachments -def file_qa(inputs, attachments): - # Read the audio bytes from the reader and encode them in base64 - audio_reader = attachments["my_wav"]["reader"] - audio_b64 = base64.b64encode(audio_reader.read()).decode('utf-8') - audio_completion = client.chat.completions.create( - model="gpt-4o-audio-preview", - messages=[ - { - "role": "user", - "content": [ - { - "type": "text", - "text": inputs["audio_question"] - }, - { - "type": "input_audio", - "input_audio": { - "data": audio_b64, - "format": "wav" - } - } - ] - }, - ] - ) + +def file_qa(inputs, attachments): # Read the audio bytes from the reader and encode them in base64 +audio_reader = attachments["my_wav"]["reader"] +audio_b64 = base64.b64encode(audio_reader.read()).decode('utf-8') +audio_completion = client.chat.completions.create( +model="gpt-4o-audio-preview", +messages=[ +{ +"role": "user", +"content": [ +{ +"type": "text", +"text": inputs["audio_question"] +}, +{ +"type": "input_audio", +"input_audio": { +"data": audio_b64, +"format": "wav" +} +} +] +}, +] +) # Most models support taking in an image URL directly in addition to base64 encoded images # You can pipe the image pre-signed URL directly to the model @@ -259,6 +273,7 @@ def file_qa(inputs, attachments): "audio_answer": audio_completion.choices[0].message.content, "image_answer": image_completion.choices[0].message.content, } + `), TypeScriptBlock(`import OpenAI from "openai"; import { wrapOpenAI } from "langsmith/wrappers"; @@ -266,66 +281,65 @@ import { wrapOpenAI } from "langsmith/wrappers"; const client: any = wrapOpenAI(new OpenAI()); async function fileQA(inputs: Record, config?: Record) { - const presignedUrl = config?.attachments?.["my_wav"]?.presigned_url; - if (!presignedUrl) { - throw new Error("No presigned URL provided for audio."); - } +const presignedUrl = config?.attachments?.["my_wav"]?.presigned_url; +if (!presignedUrl) { +throw new Error("No presigned URL provided for audio."); +} - const response = await fetch(presignedUrl); - if (!response.ok) { - throw new Error(\`Failed to fetch audio: $\{response.statusText\}\`); - } +const response = await fetch(presignedUrl); +if (!response.ok) { +throw new Error(\`Failed to fetch audio: $\{response.statusText\}\`); +} - const arrayBuffer = await response.arrayBuffer(); - const uint8Array = new Uint8Array(arrayBuffer); - const audioB64 = Buffer.from(uint8Array).toString("base64"); - - - const audioCompletion = await client.chat.completions.create({ - model: "gpt-4o-audio-preview", - messages: [ - { - role: "user", - content: [ - { type: "text", text: inputs["audio_question"] }, - { - type: "input_audio", - input_audio: { - data: audioB64, - format: "wav", - }, - }, - ], - }, - ], - }); - - const imageUrl = config?.attachments?.["my_img"]?.presigned_url - const imageCompletion = await client.chat.completions.create({ - model: "gpt-4o-mini", - messages: [ - { - role: "user", - content: [ - { type: "text", text: inputs["image_question"] }, - { - type: "image_url", - image_url: { - url: imageUrl, - }, - }, - ], - }, - ], - }); - - return { - audio_answer: audioCompletion.choices[0].message.content, - image_answer: imageCompletion.choices[0].message.content, - }; +const arrayBuffer = await response.arrayBuffer(); +const uint8Array = new Uint8Array(arrayBuffer); +const audioB64 = Buffer.from(uint8Array).toString("base64"); + +const audioCompletion = await client.chat.completions.create({ +model: "gpt-4o-audio-preview", +messages: [ +{ +role: "user", +content: [ +{ type: "text", text: inputs["audio_question"] }, +{ +type: "input_audio", +input_audio: { +data: audioB64, +format: "wav", +}, +}, +], +}, +], +}); + +const imageUrl = config?.attachments?.["my_img"]?.presigned_url +const imageCompletion = await client.chat.completions.create({ +model: "gpt-4o-mini", +messages: [ +{ +role: "user", +content: [ +{ type: "text", text: inputs["image_question"] }, +{ +type: "image_url", +image_url: { +url: imageUrl, +}, +}, +], +}, +], +}); + +return { +audio_answer: audioCompletion.choices[0].message.content, +image_answer: imageCompletion.choices[0].message.content, +}; }`), - ]} - groupId="client-language" +]} +groupId="client-language" /> ### Define custom evaluators with attachments @@ -338,7 +352,7 @@ In addition to using attachments inside of your target function, you can also us from pydantic import BaseModel def valid_image_description(outputs: dict, attachments: dict) -> bool: - """Use an LLM to judge if the reasoning and the answer are consistent.""" +"""Use an LLM to judge if the reasoning and the answer are consistent.""" instructions = """ Does the description of the following image make sense? @@ -369,9 +383,9 @@ def valid_image_description(outputs: dict, attachments: dict) -> bool: return response.choices[0].message.parsed.description_is_valid langsmith_client.evaluate( - file_qa, - data=dataset_name, - evaluators=[valid_image_description], +file_qa, +data=dataset_name, +evaluators=[valid_image_description], ) `), TypeScriptBlock(`import { zodResponseFormat } from 'openai/helpers/zod'; @@ -379,18 +393,18 @@ import { z } from 'zod'; import { evaluate } from "langsmith/evaluation"; const DescriptionResponse = z.object({ - description_is_valid: z.boolean(), +description_is_valid: z.boolean(), }); async function validImageDescription({ - outputs, - attachments, +outputs, +attachments, }: { - outputs?: any; - attachments?: any; +outputs?: any; +attachments?: any; }): Promise<{ key: string; score: boolean}> { - const instructions = \`Does the description of the following image make sense? - Please carefully review the image and the description to determine if the description is valid.\`; +const instructions = \`Does the description of the following image make sense? +Please carefully review the image and the description to determine if the description is valid.\`; const imageUrl = attachments?.["my_img"]?.presigned_url @@ -414,17 +428,18 @@ async function validImageDescription({ const score: boolean = completion.choices[0]?.message?.parsed?.description_is_valid ?? false; return { key: "valid_image_description", score }; + } const resp = await evaluate(fileQA, { - data: datasetName, - // Need to pass flag to include attachments - includeAttachments: true, - evaluators: [validImageDescription], - client: langsmithClient +data: datasetName, +// Need to pass flag to include attachments +includeAttachments: true, +evaluators: [validImageDescription], +client: langsmithClient });`), - ]} - groupId="client-language" +]} +groupId="client-language" /> ## Managing datasets with attachments @@ -465,35 +480,35 @@ example_update = ExampleUpdateWithAttachments( langsmith_client.update_example_with_attachments(dataset_id=dataset.id, updates=[example_update]) `), TypeScriptBlock(`import { ExampleUpdateWithAttachments } from "langsmith/schemas"; - + const exampleUpdate: ExampleUpdateWithAttachments = { - id: exampleId, - attachments: { - // These are net new attachments - "my_new_file": { - mimeType: "text/plain", - data: Buffer.from("foo bar") - }, - }, - attachments_operations: { - // Retained attachments will stay exactly the same - retain: ["my_img"], - // Renaming attachments preserves the original data - rename: { - "my_wav": "my_new_wav", - }, - // Any attachments not in rename/retain will be deleted - // In this case, that would be "my_pdf" - }, +id: exampleId, +attachments: { +// These are net new attachments +"my_new_file": { +mimeType: "text/plain", +data: Buffer.from("foo bar") +}, +}, +attachments_operations: { +// Retained attachments will stay exactly the same +retain: ["my_img"], +// Renaming attachments preserves the original data +rename: { +"my_wav": "my_new_wav", +}, +// Any attachments not in rename/retain will be deleted +// In this case, that would be "my_pdf" +}, }; await langsmithClient.updateExamplesMultipart( - dataset.id, - [exampleUpdate], +dataset.id, +[exampleUpdate], ); `), ]} - groupId="client-language" +groupId="client-language" /> :::warning Attachment Operations @@ -512,4 +527,4 @@ When editing an example in the UI, you can upload new attachments, rename and de and there is also a quick reset button to restore the attachments to what previously existed on the example. No changes will be saved until you click submit. -![](./static/attachment_editing.gif) \ No newline at end of file +![](./static/attachment_editing.gif) diff --git a/docs/evaluation/how_to_guides/index.md b/docs/evaluation/how_to_guides/index.md index 687c951f..d1cf7968 100644 --- a/docs/evaluation/how_to_guides/index.md +++ b/docs/evaluation/how_to_guides/index.md @@ -44,7 +44,7 @@ Evaluate and improve your application before deploying it. - [Handle model rate limits](./how_to_guides/rate_limiting) - [Print detailed logs (Python only)](../../observability/how_to_guides/tracing/output_detailed_logs) - [Run an evaluation locally (beta, Python only)](./how_to_guides/local) ->>>>>>> main + > > > > > > > main ## Unit testing