From f6d5056bd45ed8e988fb97be321a8b3996d31790 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Fri, 27 Dec 2024 16:16:28 -0800 Subject: [PATCH] Make default withStructuredOutput method for OpenAI json_schema --- .../docs/how_to/graph_constructing.ipynb | 2 +- .../docs/how_to/query_high_cardinality.ipynb | 4 +- .../integration_openai_wsa_json_schema.ts | 2 +- .../models/chat/integration_openai_wsa_zod.ts | 2 +- examples/src/tools/duckduckgo_search_agent.ts | 2 +- libs/langchain-openai/src/chat_models.ts | 38 ++++----- .../tests/chat_models.standard.int.test.ts | 2 +- .../chat_models_structured_output.int.test.ts | 79 ++++++++++++------- 8 files changed, 79 insertions(+), 52 deletions(-) diff --git a/docs/core_docs/docs/how_to/graph_constructing.ipynb b/docs/core_docs/docs/how_to/graph_constructing.ipynb index dfee57df8191..2df65144e104 100644 --- a/docs/core_docs/docs/how_to/graph_constructing.ipynb +++ b/docs/core_docs/docs/how_to/graph_constructing.ipynb @@ -102,7 +102,7 @@ "\n", "const model = new ChatOpenAI({\n", " temperature: 0,\n", - " model: \"gpt-4-turbo-preview\",\n", + " model: \"gpt-4o-mini\",\n", "});\n", "\n", "const llmGraphTransformer = new LLMGraphTransformer({\n", diff --git a/docs/core_docs/docs/how_to/query_high_cardinality.ipynb b/docs/core_docs/docs/how_to/query_high_cardinality.ipynb index 589f4e2cc933..804a5ebc7e58 100644 --- a/docs/core_docs/docs/how_to/query_high_cardinality.ipynb +++ b/docs/core_docs/docs/how_to/query_high_cardinality.ipynb @@ -392,7 +392,7 @@ "metadata": {}, "source": [ "```{=mdx}\n", - "\n", + "\n", "```" ] }, @@ -635,4 +635,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/examples/src/models/chat/integration_openai_wsa_json_schema.ts b/examples/src/models/chat/integration_openai_wsa_json_schema.ts index d21bc3a1d490..2370cbdcf79f 100644 --- a/examples/src/models/chat/integration_openai_wsa_json_schema.ts +++ b/examples/src/models/chat/integration_openai_wsa_json_schema.ts @@ -3,7 +3,7 @@ import { ChatOpenAI } from "@langchain/openai"; const model = new ChatOpenAI({ temperature: 0, - model: "gpt-4-turbo-preview", + model: "gpt-4o-mini", }); const calculatorSchema = { diff --git a/examples/src/models/chat/integration_openai_wsa_zod.ts b/examples/src/models/chat/integration_openai_wsa_zod.ts index 64d83b00b6ad..3b29af39cdb8 100644 --- a/examples/src/models/chat/integration_openai_wsa_zod.ts +++ b/examples/src/models/chat/integration_openai_wsa_zod.ts @@ -4,7 +4,7 @@ import { z } from "zod"; const model = new ChatOpenAI({ temperature: 0, - model: "gpt-4-turbo-preview", + model: "gpt-4o-mini", }); const calculatorSchema = z.object({ diff --git a/examples/src/tools/duckduckgo_search_agent.ts b/examples/src/tools/duckduckgo_search_agent.ts index 90b0d549ac12..f2a29ed4f25f 100644 --- a/examples/src/tools/duckduckgo_search_agent.ts +++ b/examples/src/tools/duckduckgo_search_agent.ts @@ -15,7 +15,7 @@ const prompt = await pull( "hwchase17/openai-functions-agent" ); const llm = new ChatOpenAI({ - model: "gpt-4-turbo-preview", + model: "gpt-4o-mini", temperature: 0, }); const agent = await createOpenAIFunctionsAgent({ diff --git a/libs/langchain-openai/src/chat_models.ts b/libs/langchain-openai/src/chat_models.ts index 3bd3a4e5ee57..91023c5561b6 100644 --- a/libs/langchain-openai/src/chat_models.ts +++ b/libs/langchain-openai/src/chat_models.ts @@ -2035,24 +2035,7 @@ export class ChatOpenAI< } else { outputParser = new JsonOutputParser(); } - } else if (method === "jsonSchema") { - llm = this.bind({ - response_format: { - type: "json_schema", - json_schema: { - name: name ?? "extract", - description: schema.description, - schema, - strict: config?.strict, - }, - }, - } as Partial); - if (isZodSchema(schema)) { - outputParser = StructuredOutputParser.fromZodSchema(schema); - } else { - outputParser = new JsonOutputParser(); - } - } else { + } else if (method === "tool_calling") { let functionName = name ?? "extract"; // Is function calling if (isZodSchema(schema)) { @@ -2120,6 +2103,25 @@ export class ChatOpenAI< keyName: functionName, }); } + } else { + let finalSchema = schema; + if (!isZodSchema(schema)) { + if (schema.parameters !== undefined) { + finalSchema = schema.parameters; + } + } + llm = this.bind({ + response_format: { + type: "json_schema", + json_schema: { + name: name ?? "extract", + description: schema.description, + schema: finalSchema, + strict: config?.strict ?? true, + }, + }, + } as Partial); + outputParser = new JsonOutputParser(); } if (!includeRaw) { diff --git a/libs/langchain-openai/src/tests/chat_models.standard.int.test.ts b/libs/langchain-openai/src/tests/chat_models.standard.int.test.ts index 1dd34509ed04..32993f949f14 100644 --- a/libs/langchain-openai/src/tests/chat_models.standard.int.test.ts +++ b/libs/langchain-openai/src/tests/chat_models.standard.int.test.ts @@ -26,7 +26,7 @@ class ChatOpenAIStandardIntegrationTests extends ChatModelIntegrationTests< chatModelHasStructuredOutput: true, supportsParallelToolCalls: true, constructorArgs: { - model: "gpt-3.5-turbo", + model: "gpt-4o-mini", }, }); } diff --git a/libs/langchain-openai/src/tests/chat_models_structured_output.int.test.ts b/libs/langchain-openai/src/tests/chat_models_structured_output.int.test.ts index 36590c636d36..96ca0a989b8a 100644 --- a/libs/langchain-openai/src/tests/chat_models_structured_output.int.test.ts +++ b/libs/langchain-openai/src/tests/chat_models_structured_output.int.test.ts @@ -6,10 +6,10 @@ import { test, expect, describe, it } from "@jest/globals"; import { concat } from "@langchain/core/utils/stream"; import { ChatOpenAI } from "../chat_models.js"; -test("withStructuredOutput zod schema function calling", async () => { +test("withStructuredOutput with zod schema", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -65,10 +65,44 @@ test("withStructuredOutput with o1", async () => { expect("number2" in result).toBe(true); }); +test.only("withStructuredOutput with optional properties", async () => { + const model = new ChatOpenAI({ + model: "o1", + }); + + const calculatorSchema = z.object({ + operation: z.enum(["add", "subtract", "multiply", "divide"]), + number1: z.number(), + number2: z.number(), + number3: z.nullable(z.number()), + }); + const modelWithStructuredOutput = model.withStructuredOutput( + calculatorSchema, + { + name: "calculator", + } + ); + + const prompt = ChatPromptTemplate.fromMessages([ + [ + "developer", + "You are VERY bad at math and must always use a calculator. Do not supply any additional numbers.", + ], + ["human", "Please help me!! What is 2 + 2?"], + ]); + const chain = prompt.pipe(modelWithStructuredOutput); + const result = await chain.invoke({}); + console.log(result); + expect("operation" in result).toBe(true); + expect("number1" in result).toBe(true); + expect("number2" in result).toBe(true); + expect(result.number3).toBe(null); +}); + test("withStructuredOutput zod schema streaming", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -91,6 +125,7 @@ test("withStructuredOutput zod schema streaming", async () => { const stream = await chain.stream({}); const chunks = []; for await (const chunk of stream) { + console.log(chunk); chunks.push(chunk); } expect(chunks.length).toBeGreaterThan(1); @@ -103,7 +138,7 @@ test("withStructuredOutput zod schema streaming", async () => { test("withStructuredOutput zod schema JSON mode", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -139,10 +174,10 @@ Respond with a JSON object containing three keys: expect("number2" in result).toBe(true); }); -test("withStructuredOutput JSON schema function calling", async () => { +test("withStructuredOutput with JSON schema", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -167,10 +202,10 @@ test("withStructuredOutput JSON schema function calling", async () => { expect("number2" in result).toBe(true); }); -test("withStructuredOutput OpenAI function definition function calling", async () => { +test("withStructuredOutput OpenAI with function definition", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -198,7 +233,7 @@ test("withStructuredOutput OpenAI function definition function calling", async ( test("withStructuredOutput JSON schema JSON mode", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -237,7 +272,7 @@ Respond with a JSON object containing three keys: test("withStructuredOutput JSON schema", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const jsonSchema = { @@ -252,6 +287,8 @@ test("withStructuredOutput JSON schema", async () => { number1: { type: "number" }, number2: { type: "number" }, }, + required: ["operation", "number1", "number2"], + additionalProperties: false, }; const modelWithStructuredOutput = model.withStructuredOutput(jsonSchema); @@ -278,7 +315,7 @@ Respond with a JSON object containing three keys: test("withStructuredOutput includeRaw true", async () => { const model = new ChatOpenAI({ temperature: 0, - modelName: "gpt-4-turbo-preview", + modelName: "gpt-4o-mini", }); const calculatorSchema = z.object({ @@ -318,22 +355,10 @@ test("withStructuredOutput includeRaw true", async () => { throw new Error("raw not in result"); } const { raw } = result as { raw: AIMessage }; - expect(raw.additional_kwargs.tool_calls?.length).toBeGreaterThan(0); - expect(raw.additional_kwargs.tool_calls?.[0].function.name).toBe( - "calculator" - ); - expect( - "operation" in - JSON.parse(raw.additional_kwargs.tool_calls?.[0].function.arguments ?? "") - ).toBe(true); - expect( - "number1" in - JSON.parse(raw.additional_kwargs.tool_calls?.[0].function.arguments ?? "") - ).toBe(true); - expect( - "number2" in - JSON.parse(raw.additional_kwargs.tool_calls?.[0].function.arguments ?? "") - ).toBe(true); + console.log(raw); + expect("operation" in JSON.parse(raw.content as string)).toBe(true); + expect("number1" in JSON.parse(raw.content as string)).toBe(true); + expect("number2" in JSON.parse(raw.content as string)).toBe(true); }); test("parallelToolCalls param", async () => {