diff --git a/.github/workflows/standard-tests.yml b/.github/workflows/standard-tests.yml index 221837ef7448..cdcfd7bbf91c 100644 --- a/.github/workflows/standard-tests.yml +++ b/.github/workflows/standard-tests.yml @@ -6,25 +6,8 @@ on: - cron: '0 13 * * *' jobs: - get-changed-files: - runs-on: ubuntu-latest - outputs: - changed_files: ${{ steps.get_changes.outputs.changed_files }} - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - name: Get changes - id: get_changes - run: | - echo "changed_files<> $GITHUB_OUTPUT - git diff --name-only -r HEAD^1 HEAD | while read line; do printf "%s\n" "$line"; done >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - standard-tests: runs-on: ubuntu-latest - needs: get-changed-files strategy: matrix: package: [anthropic, cohere, google-genai, groq, mistralai] @@ -52,8 +35,6 @@ jobs: # we need separate jobs for each test. standard-tests-openai: runs-on: ubuntu-latest - needs: get-changed-files - if: contains(needs.get-changed-files.outputs.changed_files, 'langchain-core/') || contains(needs.get-changed-files.outputs.changed_files, 'libs/langchain-openai/') steps: - uses: actions/checkout@v4 - name: Use Node.js 18.x @@ -72,8 +53,6 @@ jobs: standard-tests-azure-openai: runs-on: ubuntu-latest - needs: get-changed-files - if: contains(needs.get-changed-files.outputs.changed_files, 'langchain-core/') || contains(needs.get-changed-files.outputs.changed_files, 'libs/langchain-openai/') steps: - uses: actions/checkout@v4 - name: Use Node.js 18.x @@ -95,8 +74,6 @@ jobs: standard-tests-bedrock: runs-on: ubuntu-latest - needs: get-changed-files - if: contains(needs.get-changed-files.outputs.changed_files, 'langchain-core/') || contains(needs.get-changed-files.outputs.changed_files, 'libs/langchain-community/') steps: - uses: actions/checkout@v4 - name: Use Node.js 18.x diff --git a/docs/core_docs/docs/concepts.mdx b/docs/core_docs/docs/concepts.mdx index 56078f972c73..5a310bb33189 100644 --- a/docs/core_docs/docs/concepts.mdx +++ b/docs/core_docs/docs/concepts.mdx @@ -144,7 +144,7 @@ Chat models support the assignment of distinct roles to conversation messages, h Although the underlying models are messages in, message out, the LangChain wrappers also allow these models to take a string as input. This gives them the same interface as LLMs (and simpler to use). -When a string is passed in as input, it will be converted to a HumanMessage under the hood before being passed to the underlying model. +When a string is passed in as input, it will be converted to a `HumanMessage` under the hood before being passed to the underlying model. LangChain does not host any Chat Models, rather we rely on third party integrations. @@ -751,7 +751,105 @@ You can roughly think of it as an iterator over callback events (though the form See [this guide](/docs/how_to/streaming/#using-stream-events) for more detailed information on how to use `.streamEvents()`. -### Function/tool calling +### Structured output + +LLMs are capable of generating arbitrary text. This enables the model to respond appropriately to a wide +range of inputs, but for some use-cases, it can be useful to constrain the LLM's output +to a specific format or structure. This is referred to as **structured output**. + +For example, if the output is to be stored in a relational database, +it is much easier if the model generates output that adheres to a defined schema or format. +[Extracting specific information](/docs/tutorials/extraction/) from unstructured text is another +case where this is particularly useful. Most commonly, the output format will be JSON, +though other formats such as [XML](/docs/how_to/output_parser_xml/) can be useful too. Below, we'll discuss +a few ways to get structured output from models in LangChain. + +#### `.withStructuredOutput()` + +For convenience, some LangChain chat models support a `.withStructuredOutput()` method. +This method only requires a schema as input, and returns an object matching the requested schema. +Generally, this method is only present on models that support one of the more advanced methods described below, +and will use one of them under the hood. It takes care of importing a suitable output parser and +formatting the schema in the right format for the model. + +For more information, check out this [how-to guide](/docs/how_to/structured_output/#the-.withstructuredoutput-method). + +#### Raw prompting + +The most intuitive way to get a model to structure output is to ask nicely. +In addition to your query, you can give instructions describing what kind of output you'd like, then +parse the output using an [output parser](/docs/concepts/#output-parsers) to convert the raw +model message or string output into something more easily manipulated. + +The biggest benefit to raw prompting is its flexibility: + +- Raw prompting does not require any special model features, only sufficient reasoning capability to understand + the passed schema. +- You can prompt for any format you'd like, not just JSON. This can be useful if the model you + are using is more heavily trained on a certain type of data, such as XML or YAML. + +However, there are some drawbacks too: + +- LLMs are non-deterministic, and prompting a LLM to consistently output data in the exactly correct format + for smooth parsing can be surprisingly difficult and model-specific. +- Individual models have quirks depending on the data they were trained on, and optimizing prompts can be quite difficult. + Some may be better at interpreting [JSON schema](https://json-schema.org/), others may be best with TypeScript definitions, + and still others may prefer XML. + +While we'll next go over some ways that you can take advantage of features offered by +model providers to increase reliability, prompting techniques remain important for tuning your +results no matter what method you choose. + +#### JSON mode + + + +Some models, such as [Mistral](/docs/integrations/chat/mistral/), [OpenAI](/docs/integrations/chat/openai/), +[Together AI](/docs/integrations/chat/togetherai/) and [Ollama](/docs/integrations/chat/ollama/), +support a feature called **JSON mode**, usually enabled via config. + +When enabled, JSON mode will constrain the model's output to always be some sort of valid JSON. +Often they require some custom prompting, but it's usually much less burdensome and along the lines of, +`"you must always return JSON"`, and the [output is easier to parse](/docs/how_to/output_parser_json/). + +It's also generally simpler and more commonly available than tool calling. + +Here's an example: + +```ts +import { JsonOutputParser } from "@langchain/core/output_parsers"; +import { ChatPromptTemplate } from "@langchain/core/prompts"; +import { ChatOpenAI } from "@langchain/openai"; + +const model = new ChatOpenAI({ + model: "gpt-4o", + modelKwargs: { + response_format: { type: "json_object" }, + }, +}); + +const TEMPLATE = `Answer the user's question to the best of your ability. +You must always output a JSON object with an "answer" key and a "followup_question" key. + +{question}`; + +const prompt = ChatPromptTemplate.fromTemplate(TEMPLATE); + +const chain = prompt.pipe(model).pipe(new JsonOutputParser()); + +await chain.invoke({ question: "What is the powerhouse of the cell?" }); +``` + +``` +{ + answer: "The powerhouse of the cell is the mitochondrion.", + followup_question: "Would you like to learn more about the functions of mitochondria?" +} +``` + +For a full list of model providers that support JSON mode, see [this table](/docs/integrations/chat/). + +#### Function/tool calling :::info We use the term tool calling interchangeably with function calling. Although @@ -769,8 +867,10 @@ from unstructured text, you could give the model an "extraction" tool that takes parameters matching the desired schema, then treat the generated output as your final result. -A tool call includes a name, arguments dict, and an optional identifier. The -arguments dict is structured `{argument_name: argument_value}`. +For models that support it, tool calling can be very convenient. It removes the +guesswork around how best to prompt schemas in favor of a built-in model feature. It can also +more naturally support agentic flows, since you can just pass multiple tool schemas instead +of fiddling with enums or unions. Many LLM providers, including [Anthropic](https://www.anthropic.com/), [Cohere](https://cohere.com/), [Google](https://cloud.google.com/vertex-ai), @@ -787,14 +887,16 @@ LangChain provides a standardized interface for tool calling that is consistent The standard interface consists of: -- `ChatModel.bindTools()`: a method for specifying which tools are available for a model to call. +- `ChatModel.bindTools()`: a method for specifying which tools are available for a model to call. This method accepts [LangChain tools](/docs/concepts/#tools). - `AIMessage.toolCalls`: an attribute on the `AIMessage` returned from the model for accessing the tool calls requested by the model. -There are two main use cases for function/tool calling: +The following how-to guides are good practical resources for using function/tool calling: - [How to return structured data from an LLM](/docs/how_to/structured_output/) - [How to use a model to call tools](/docs/how_to/tool_calling/) +For a full list of model providers that support tool calling, [see this table](/docs/integrations/chat/). + ### Retrieval LangChain provides several advanced retrieval types. A full list is below, along with the following information: diff --git a/docs/core_docs/docs/how_to/structured_output.ipynb b/docs/core_docs/docs/how_to/structured_output.ipynb index 56dfa24f315f..2173bb92ecfa 100644 --- a/docs/core_docs/docs/how_to/structured_output.ipynb +++ b/docs/core_docs/docs/how_to/structured_output.ipynb @@ -16,6 +16,9 @@ "metadata": {}, "source": [ "# How to return structured data from a model\n", + "```{=mdx}\n", + "\n", + "```\n", "\n", "It is often useful to have a model return output that matches some specific schema. One common use-case is extracting data from arbitrary text to insert into a traditional database or use with some other downstrem system. This guide will show you a few different strategies you can use to do this.\n", "\n", diff --git a/docs/core_docs/docs/integrations/chat/index.mdx b/docs/core_docs/docs/integrations/chat/index.mdx index c73f748e0bd0..8daf811e1dab 100644 --- a/docs/core_docs/docs/integrations/chat/index.mdx +++ b/docs/core_docs/docs/integrations/chat/index.mdx @@ -1,6 +1,7 @@ --- sidebar_position: 1 sidebar_class_name: hidden +hide_table_of_contents: true --- # Chat models @@ -11,36 +12,33 @@ All ChatModels implement the Runnable interface, which comes with default implem - _Streaming_ support defaults to returning an `AsyncIterator` of a single value, the final result returned by the underlying ChatModel provider. This obviously doesn't give you token-by-token streaming, which requires native support from the ChatModel provider, but ensures your code that expects an iterator of tokens can work for any of our ChatModel integrations. - _Batch_ support defaults to calling the underlying ChatModel in parallel for each input. The concurrency can be controlled with the `maxConcurrency` key in `RunnableConfig`. -- _Map_ support defaults to calling `.invoke` across all instances of the array which it was called on. Each ChatModel integration can optionally provide native implementations to truly enable invoke, streaming or batching requests. Additionally, some chat models support additional ways of guaranteeing structure in their outputs by allowing you to pass in a defined schema. -[Function calling and parallel function calling](/docs/how_to/tool_calling) (tool calling) are two common ones, and those capabilities allow you to use the chat model as the LLM in certain types of agents. +[Tool calling](/docs/how_to/tool_calling) (tool calling) is one capability, and allows you to use the chat model as the LLM in certain types of agents. Some models in LangChain have also implemented a `withStructuredOutput()` method that unifies many of these different ways of constraining output to a schema. The table shows, for each integration, which features have been implemented with native support. Yellow circles (🟡) indicates partial support - for example, if the model supports tool calling but not tool messages for agents. -| Model | Invoke | Stream | Batch | Function Calling | Tool Calling | `withStructuredOutput()` | -| :---------------------- | :----: | :----: | :---: | :--------------: | :-------------------------: | :----------------------: | -| BedrockChat | ✅ | ✅ | ✅ | ❌ | 🟡 (Bedrock Anthropic only) | ❌ | -| ChatAlibabaTongyi | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | -| ChatAnthropic | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | -| ChatBaiduWenxin | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | -| ChatCloudflareWorkersAI | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatCohere | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatFireworks | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| ChatGoogleGenerativeAI | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatGoogleVertexAI | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatVertexAI | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | -| ChatGooglePaLM | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | -| ChatGroq | ✅ | ✅ | ✅ | ❌ | 🟡 | ✅ | -| ChatLlamaCpp | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatMinimax | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | -| ChatMistralAI | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | -| ChatOllama | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatOpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| ChatTencentHunyuan | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatTogetherAI | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ChatYandexGPT | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | -| ChatZhipuAI | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | +| Model | Stream | JSON mode | [Tool Calling](/docs/how_to/tool_calling/) | [`withStructuredOutput()`](/docs/how_to/structured_output/#the-.withstructuredoutput-method) | [Multimodal](/docs/how_to/multimodal_inputs/) | +| :---------------------- | :----: | :-------: | :----------------------------------------: | :------------------------------------------------------------------------------------------: | :-------------------------------------------: | +| BedrockChat | ✅ | ❌ | 🟡 (Bedrock Anthropic only) | ❌ | ❌ | +| ChatAlibabaTongyi | ❌ | ❌ | ❌ | ❌ | ❌ | +| ChatAnthropic | ✅ | ❌ | ✅ | ✅ | ✅ | +| ChatBaiduWenxin | ❌ | ❌ | ❌ | ❌ | ❌ | +| ChatCloudflareWorkersAI | ✅ | ❌ | ❌ | ❌ | ❌ | +| ChatCohere | ✅ | ❌ | ❌ | ❌ | ❌ | +| ChatFireworks | ✅ | ✅ | ✅ | ❌ | ❌ | +| ChatGoogleGenerativeAI | ✅ | ❌ | ❌ | ❌ | ✅ | +| ChatVertexAI | ✅ | ❌ | ✅ | ✅ | ✅ | +| ChatGroq | ✅ | ✅ | 🟡 | ✅ | ❌ | +| ChatLlamaCpp | ✅ | ❌ | ❌ | ❌ | ❌ | +| ChatMinimax | ❌ | ❌ | ❌ | ❌ | ❌ | +| ChatMistralAI | ❌ | ✅ | ✅ | ✅ | ❌ | +| ChatOllama | ✅ | ✅ | ❌ | ❌ | ❌ | +| ChatOpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | +| ChatTencentHunyuan | ✅ | ❌ | ❌ | ❌ | ❌ | +| ChatTogetherAI | ✅ | ✅ | ❌ | ❌ | ❌ | +| ChatYandexGPT | ❌ | ❌ | ❌ | ❌ | ❌ | +| ChatZhipuAI | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/core_docs/docs/tutorials/agents.mdx b/docs/core_docs/docs/tutorials/agents.mdx index 26085a14bdb2..3dad19ed2029 100644 --- a/docs/core_docs/docs/tutorials/agents.mdx +++ b/docs/core_docs/docs/tutorials/agents.mdx @@ -83,9 +83,7 @@ const vectorstore = await MemoryVectorStore.fromDocuments( ); const retriever = vectorstore.asRetriever(); -const retrieverResult = await retriever.getRelevantDocuments( - "how to upload a dataset" -); +const retrieverResult = await retriever.invoke("how to upload a dataset"); console.log(retrieverResult[0]); /* diff --git a/docs/core_docs/docs/tutorials/rag.ipynb b/docs/core_docs/docs/tutorials/rag.ipynb index 483190981af6..8db0dd70f6d1 100644 --- a/docs/core_docs/docs/tutorials/rag.ipynb +++ b/docs/core_docs/docs/tutorials/rag.ipynb @@ -136,7 +136,7 @@ " outputParser: new StringOutputParser(),\n", "})\n", "\n", - "const retrievedDocs = await retriever.getRelevantDocuments(\"what is task decomposition\")" + "const retrievedDocs = await retriever.invoke(\"what is task decomposition\")" ] }, { @@ -817,7 +817,7 @@ " prompt: customRagPrompt,\n", " outputParser: new StringOutputParser(),\n", "})\n", - "const context = await retriever.getRelevantDocuments(\"what is task decomposition\");\n", + "const context = await retriever.invoke(\"what is task decomposition\");\n", "\n", "await ragChain.invoke({\n", " question: \"What is Task Decomposition?\",\n", diff --git a/examples/src/retrievers/parent_document_retriever_rerank.ts b/examples/src/retrievers/parent_document_retriever_rerank.ts index 11726aae3396..f0918b602a67 100644 --- a/examples/src/retrievers/parent_document_retriever_rerank.ts +++ b/examples/src/retrievers/parent_document_retriever_rerank.ts @@ -69,9 +69,7 @@ await retriever.addDocuments(docs); // This will search for documents in vector store and return for LLM already reranked and sorted document // with appropriate minimum relevance score -const retrievedDocs = await retriever.getRelevantDocuments( - "What is Pam's favorite color?" -); +const retrievedDocs = await retriever.invoke("What is Pam's favorite color?"); // Pam's favorite color is returned first! console.log(JSON.stringify(retrievedDocs, null, 2)); diff --git a/examples/src/retrievers/qdrant_self_query.ts b/examples/src/retrievers/qdrant_self_query.ts index 44f5ab85d39f..becc23b62096 100644 --- a/examples/src/retrievers/qdrant_self_query.ts +++ b/examples/src/retrievers/qdrant_self_query.ts @@ -118,16 +118,16 @@ const selfQueryRetriever = SelfQueryRetriever.fromLLM({ * We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?". * The retriever will automatically convert these questions into queries that can be used to retrieve documents. */ -const query1 = await selfQueryRetriever.getRelevantDocuments( +const query1 = await selfQueryRetriever.invoke( "Which movies are less than 90 minutes?" ); -const query2 = await selfQueryRetriever.getRelevantDocuments( +const query2 = await selfQueryRetriever.invoke( "Which movies are rated higher than 8.5?" ); -const query3 = await selfQueryRetriever.getRelevantDocuments( +const query3 = await selfQueryRetriever.invoke( "Which cool movies are directed by Greta Gerwig?" ); -const query4 = await selfQueryRetriever.getRelevantDocuments( +const query4 = await selfQueryRetriever.invoke( "Which movies are either comedy or drama and are less than 90 minutes?" ); console.log(query1, query2, query3, query4); diff --git a/libs/langchain-mistralai/package.json b/libs/langchain-mistralai/package.json index 59f8750e145a..c089b7dc552c 100644 --- a/libs/langchain-mistralai/package.json +++ b/libs/langchain-mistralai/package.json @@ -35,7 +35,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/core": ">0.1.56 <0.3.0", + "@langchain/core": ">=0.2.5 <0.3.0", "@mistralai/mistralai": "^0.4.0", "uuid": "^9.0.0", "zod": "^3.22.4", diff --git a/libs/langchain-mistralai/src/chat_models.ts b/libs/langchain-mistralai/src/chat_models.ts index a86589bd8dfd..9ea8803a343e 100644 --- a/libs/langchain-mistralai/src/chat_models.ts +++ b/libs/langchain-mistralai/src/chat_models.ts @@ -8,6 +8,7 @@ import { ChatRequest, Tool as MistralAITool, Message as MistralAIMessage, + TokenUsage as MistralAITokenUsage, } from "@mistralai/mistralai"; import { MessageType, @@ -80,6 +81,11 @@ interface MistralAICallOptions }; tools: StructuredToolInterface[] | MistralAIToolInput[] | MistralAITool[]; tool_choice?: MistralAIToolChoice; + /** + * Whether or not to include token usage in the stream. + * @default {true} + */ + streamUsage?: boolean; } export interface ChatMistralAICallOptions extends MistralAICallOptions {} @@ -87,7 +93,9 @@ export interface ChatMistralAICallOptions extends MistralAICallOptions {} /** * Input to chat model class. */ -export interface ChatMistralAIInput extends BaseChatModelParams { +export interface ChatMistralAIInput + extends BaseChatModelParams, + Pick { /** * The API key to use. * @default {process.env.MISTRAL_API_KEY} @@ -216,7 +224,8 @@ function convertMessagesToMistralMessages( } function mistralAIResponseToChatMessage( - choice: ChatCompletionResponse["choices"][0] + choice: ChatCompletionResponse["choices"][0], + usage?: MistralAITokenUsage ): BaseMessage { const { message } = choice; // MistralAI SDK does not include tool_calls in the non @@ -254,6 +263,13 @@ function mistralAIResponseToChatMessage( })) : undefined, }, + usage_metadata: usage + ? { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + } + : undefined, }); } default: @@ -261,12 +277,27 @@ function mistralAIResponseToChatMessage( } } -function _convertDeltaToMessageChunk(delta: { - role?: string | undefined; - content?: string | undefined; - tool_calls?: MistralAIToolCalls[] | undefined; -}) { +function _convertDeltaToMessageChunk( + delta: { + role?: string | undefined; + content?: string | undefined; + tool_calls?: MistralAIToolCalls[] | undefined; + }, + usage?: MistralAITokenUsage | null +) { if (!delta.content && !delta.tool_calls) { + if (usage) { + return new AIMessageChunk({ + content: "", + usage_metadata: usage + ? { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + } + : undefined, + }); + } return null; } // Our merge additional kwargs util function will throw unless there @@ -313,6 +344,13 @@ function _convertDeltaToMessageChunk(delta: { content, tool_call_chunks: toolCallChunks, additional_kwargs, + usage_metadata: usage + ? { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, + } + : undefined, }); } else if (role === "tool") { return new ToolMessageChunk({ @@ -389,6 +427,8 @@ export class ChatMistralAI< lc_serializable = true; + streamUsage = true; + constructor(fields?: ChatMistralAIInput) { super(fields ?? {}); const apiKey = fields?.apiKey ?? getEnvironmentVariable("MISTRAL_API_KEY"); @@ -409,6 +449,7 @@ export class ChatMistralAI< this.seed = this.randomSeed; this.modelName = fields?.model ?? fields?.modelName ?? this.model; this.model = this.modelName; + this.streamUsage = fields?.streamUsage ?? this.streamUsage; } getLsParams(options: this["ParsedCallOptions"]): LangSmithParams { @@ -600,7 +641,7 @@ export class ChatMistralAI< const text = part.message?.content ?? ""; const generation: ChatGeneration = { text, - message: mistralAIResponseToChatMessage(part), + message: mistralAIResponseToChatMessage(part, response?.usage), }; if (part.finish_reason) { generation.generationInfo = { finish_reason: part.finish_reason }; @@ -643,7 +684,11 @@ export class ChatMistralAI< prompt: 0, completion: choice.index ?? 0, }; - const message = _convertDeltaToMessageChunk(delta); + const shouldStreamUsage = this.streamUsage || options.streamUsage; + const message = _convertDeltaToMessageChunk( + delta, + shouldStreamUsage ? data.usage : null + ); if (message === null) { // Do not yield a chunk if the message is empty continue; diff --git a/libs/langchain-mistralai/src/tests/chat_models.int.test.ts b/libs/langchain-mistralai/src/tests/chat_models.int.test.ts index 827eaa08db03..b052f2c3e30d 100644 --- a/libs/langchain-mistralai/src/tests/chat_models.int.test.ts +++ b/libs/langchain-mistralai/src/tests/chat_models.int.test.ts @@ -11,6 +11,7 @@ import { DynamicStructuredTool, StructuredTool } from "@langchain/core/tools"; import { z } from "zod"; import { AIMessage, + AIMessageChunk, BaseMessage, HumanMessage, ToolMessage, @@ -916,3 +917,68 @@ describe("codestral-latest", () => { console.log(parsedArgs.code); }); }); + +test("Stream token count usage_metadata", async () => { + const model = new ChatMistralAI({ + model: "codestral-latest", + temperature: 0, + }); + let res: AIMessageChunk | null = null; + for await (const chunk of await model.stream( + "Why is the sky blue? Be concise." + )) { + if (!res) { + res = chunk; + } else { + res = res.concat(chunk); + } + } + console.log(res); + expect(res?.usage_metadata).toBeDefined(); + if (!res?.usage_metadata) { + return; + } + expect(res.usage_metadata.input_tokens).toBe(13); + expect(res.usage_metadata.output_tokens).toBeGreaterThan(10); + expect(res.usage_metadata.total_tokens).toBe( + res.usage_metadata.input_tokens + res.usage_metadata.output_tokens + ); +}); + +test("streamUsage excludes token usage", async () => { + const model = new ChatMistralAI({ + model: "codestral-latest", + temperature: 0, + streamUsage: false, + }); + let res: AIMessageChunk | null = null; + for await (const chunk of await model.stream( + "Why is the sky blue? Be concise." + )) { + if (!res) { + res = chunk; + } else { + res = res.concat(chunk); + } + } + console.log(res); + expect(res?.usage_metadata).not.toBeDefined(); +}); + +test("Invoke token count usage_metadata", async () => { + const model = new ChatMistralAI({ + model: "codestral-latest", + temperature: 0, + }); + const res = await model.invoke("Why is the sky blue? Be concise."); + console.log(res); + expect(res?.usage_metadata).toBeDefined(); + if (!res?.usage_metadata) { + return; + } + expect(res.usage_metadata.input_tokens).toBe(13); + expect(res.usage_metadata.output_tokens).toBeGreaterThan(10); + expect(res.usage_metadata.total_tokens).toBe( + res.usage_metadata.input_tokens + res.usage_metadata.output_tokens + ); +}); diff --git a/libs/langchain-mistralai/src/tests/chat_models.standard.int.test.ts b/libs/langchain-mistralai/src/tests/chat_models.standard.int.test.ts index 0d80e46fccdb..248b3892f727 100644 --- a/libs/langchain-mistralai/src/tests/chat_models.standard.int.test.ts +++ b/libs/langchain-mistralai/src/tests/chat_models.standard.int.test.ts @@ -23,22 +23,6 @@ class ChatMistralAIStandardIntegrationTests extends ChatModelIntegrationTests< functionId: "123456789", }); } - - async testUsageMetadataStreaming() { - this.skipTestMessage( - "testUsageMetadataStreaming", - "ChatMistralAI", - "Streaming tokens is not currently supported." - ); - } - - async testUsageMetadata() { - this.skipTestMessage( - "testUsageMetadata", - "ChatMistralAI", - "Usage metadata tokens is not currently supported." - ); - } } const testClass = new ChatMistralAIStandardIntegrationTests(); diff --git a/yarn.lock b/yarn.lock index 2d33838f76e7..0c64972dcfd1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10444,7 +10444,7 @@ __metadata: resolution: "@langchain/mistralai@workspace:libs/langchain-mistralai" dependencies: "@jest/globals": ^29.5.0 - "@langchain/core": ">0.1.56 <0.3.0" + "@langchain/core": ">=0.2.5 <0.3.0" "@langchain/scripts": ~0.0.14 "@langchain/standard-tests": 0.0.0 "@mistralai/mistralai": ^0.4.0