docs[patch]: Update token usage tracking and response metadata docs (#…

…5125) * Update token usage tracking and response metadata docs * Fix * Update
langchain-ai · Apr 17, 2024 · 8262ae9 · 8262ae9
1 parent 60a4d66
commit 8262ae9
Show file tree

Hide file tree

Showing 6 changed files with 309 additions and 43 deletions.
diff --git a/deno.json b/deno.json
@@ -2,9 +2,10 @@
   "imports": {
     "langchain/": "npm:/langchain/",
     "@faker-js/faker": "npm:@faker-js/faker",
-    "@langchain/anthropic": "npm:@langchain/anthropic",
+    "@langchain/anthropic": "npm:@langchain/anthropic@0.1.16",
     "@langchain/community/": "npm:/@langchain/community/",
     "@langchain/openai": "npm:@langchain/[email protected]",
+    "@langchain/google-vertexai-web": "npm:@langchain/[email protected]",
     "@langchain/mistralai": "npm:@langchain/[email protected]",
     "@langchain/core/": "npm:/@langchain/[email protected]/",
     "@langchain/pinecone": "npm:@langchain/pinecone",

diff --git a/docs/core_docs/docs/modules/model_io/chat/response_metadata.ipynb b/docs/core_docs/docs/modules/model_io/chat/response_metadata.ipynb
@@ -0,0 +1,200 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Response metadata\n",
+    "\n",
+    "Many model providers include some metadata in their chat generation responses. This metadata can be accessed via the `AIMessage.response_metadata` attribute. Depending on the model provider and model configuration, this can contain information like [token counts](/docs/modules/model_io/chat/token_usage_tracking/) and more.\n",
+    "\n",
+    "Here’s what the response metadata looks like for a few different providers:\n",
+    "\n",
+    "## OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  tokenUsage: { completionTokens: 164, promptTokens: 17, totalTokens: 181 },\n",
+      "  finish_reason: \"stop\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import { ChatOpenAI } from \"@langchain/openai\";\n",
+    "\n",
+    "const chatModel = new ChatOpenAI({ model: \"gpt-4-turbo\" });\n",
+    "const message = await chatModel.invoke([\n",
+    "  [\"human\", \"What's the oldest known example of cuneiform\"],\n",
+    "]);\n",
+    "\n",
+    "console.log(message.response_metadata);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Anthropic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  id: \"msg_01K8kC9wskG6qsSGRmY7b3kj\",\n",
+      "  model: \"claude-3-sonnet-20240229\",\n",
+      "  stop_sequence: null,\n",
+      "  usage: { input_tokens: 17, output_tokens: 355 },\n",
+      "  stop_reason: \"end_turn\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import { ChatAnthropic } from \"@langchain/anthropic\";\n",
+    "\n",
+    "const chatModel = new ChatAnthropic({ model: \"claude-3-sonnet-20240229\" });\n",
+    "const message = await chatModel.invoke([\n",
+    "  [\"human\", \"What's the oldest known example of cuneiform\"],\n",
+    "]);\n",
+    "\n",
+    "console.log(message.response_metadata);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Google VertexAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  usage_metadata: {\n",
+      "    prompt_token_count: undefined,\n",
+      "    candidates_token_count: undefined,\n",
+      "    total_token_count: undefined\n",
+      "  },\n",
+      "  safety_ratings: [\n",
+      "    {\n",
+      "      category: \"HARM_CATEGORY_HATE_SPEECH\",\n",
+      "      probability: \"NEGLIGIBLE\",\n",
+      "      probability_score: 0.027480692,\n",
+      "      severity: \"HARM_SEVERITY_NEGLIGIBLE\",\n",
+      "      severity_score: 0.073430054\n",
+      "    },\n",
+      "    {\n",
+      "      category: \"HARM_CATEGORY_DANGEROUS_CONTENT\",\n",
+      "      probability: \"NEGLIGIBLE\",\n",
+      "      probability_score: 0.055412795,\n",
+      "      severity: \"HARM_SEVERITY_NEGLIGIBLE\",\n",
+      "      severity_score: 0.112405084\n",
+      "    },\n",
+      "    {\n",
+      "      category: \"HARM_CATEGORY_HARASSMENT\",\n",
+      "      probability: \"NEGLIGIBLE\",\n",
+      "      probability_score: 0.055720285,\n",
+      "      severity: \"HARM_SEVERITY_NEGLIGIBLE\",\n",
+      "      severity_score: 0.020844316\n",
+      "    },\n",
+      "    {\n",
+      "      category: \"HARM_CATEGORY_SEXUALLY_EXPLICIT\",\n",
+      "      probability: \"NEGLIGIBLE\",\n",
+      "      probability_score: 0.05223086,\n",
+      "      severity: \"HARM_SEVERITY_NEGLIGIBLE\",\n",
+      "      severity_score: 0.14891148\n",
+      "    }\n",
+      "  ],\n",
+      "  finish_reason: undefined\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import { ChatVertexAI } from \"@langchain/google-vertexai-web\";\n",
+    "\n",
+    "const chatModel = new ChatVertexAI({ model: \"gemini-pro\" });\n",
+    "const message = await chatModel.invoke([\n",
+    "  [\"human\", \"What's the oldest known example of cuneiform\"],\n",
+    "]);\n",
+    "\n",
+    "console.log(message.response_metadata);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MistralAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  tokenUsage: { completionTokens: 166, promptTokens: 19, totalTokens: 185 },\n",
+      "  finish_reason: \"stop\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import { ChatMistralAI } from \"@langchain/mistralai\";\n",
+    "\n",
+    "const chatModel = new ChatMistralAI({ model: \"mistral-tiny\" });\n",
+    "const message = await chatModel.invoke([\n",
+    "  [\"human\", \"What's the oldest known example of cuneiform\"],\n",
+    "]);\n",
+    "\n",
+    "console.log(message.response_metadata);"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Deno",
+   "language": "typescript",
+   "name": "deno"
+  },
+  "language_info": {
+   "file_extension": ".ts",
+   "mimetype": "text/x.typescript",
+   "name": "typescript",
+   "nb_converter": "script",
+   "pygments_lexer": "typescript",
+   "version": "5.3.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/core_docs/docs/modules/model_io/chat/token_usage_tracking.mdx b/docs/core_docs/docs/modules/model_io/chat/token_usage_tracking.mdx
@@ -4,9 +4,12 @@ sidebar_position: 5
 
 # Tracking token usage
 
-This notebook goes over how to track your token usage for specific calls. This is currently only implemented for the OpenAI API.
+This notebook goes over how to track your token usage for specific calls.
 
-Here's an example of tracking token usage for a single Chat model call:
+## Using AIMessage.response_metadata
+
+A number of model providers return token usage information as part of the chat generation response. When available, this is included in the [AIMessage.response_metadata](/docs/modules/model_io/chat/response_metadata/) field.
+Here's an example with OpenAI:
 
 import CodeBlock from "@theme/CodeBlock";
 import Example from "@examples/models/chat/token_usage_tracking.ts";
@@ -21,4 +24,21 @@ npm install @langchain/openai
 
 <CodeBlock language="typescript">{Example}</CodeBlock>
 
-If this model is passed to a chain or agent that calls it multiple times, it will log an output each time.
+And here's an example with Anthropic:
+
+import AnthropicExample from "@examples/models/chat/token_usage_tracking_anthropic.ts";
+
+```bash npm2yarn
+npm install @langchain/anthropic
+```
+
+<CodeBlock language="typescript">{AnthropicExample}</CodeBlock>
+
+## Using callbacks
+
+You can also use the `handleLLMEnd` callback to get the full output from the LLM, including token usage for supported models.
+Here's an example of how you could do that:
+
+import CallbackExample from "@examples/models/chat/token_usage_tracking_callback.ts";
+
+<CodeBlock language="typescript">{CallbackExample}</CodeBlock>
diff --git a/examples/src/models/chat/token_usage_tracking.ts b/examples/src/models/chat/token_usage_tracking.ts
@@ -1,49 +1,16 @@
 import { ChatOpenAI } from "@langchain/openai";
 
 const chatModel = new ChatOpenAI({
-  model: "gpt-4",
-  callbacks: [
-    {
-      handleLLMEnd(output) {
-        console.log(JSON.stringify(output, null, 2));
-      },
-    },
-  ],
+  model: "gpt-4-turbo",
 });
 
-await chatModel.invoke("Tell me a joke.");
+const res = await chatModel.invoke("Tell me a joke.");
+
+console.log(res.response_metadata);
 
 /*
   {
-    "generations": [
-      [
-        {
-          "text": "Why don't scientists trust atoms?\n\nBecause they make up everything!",
-          "message": {
-            "lc": 1,
-            "type": "constructor",
-            "id": [
-              "langchain_core",
-              "messages",
-              "AIMessage"
-            ],
-            "kwargs": {
-              "content": "Why don't scientists trust atoms?\n\nBecause they make up everything!",
-              "additional_kwargs": {}
-            }
-          },
-          "generationInfo": {
-            "finish_reason": "stop"
-          }
-        }
-      ]
-    ],
-    "llmOutput": {
-      "tokenUsage": {
-        "completionTokens": 13,
-        "promptTokens": 12,
-        "totalTokens": 25
-      }
-    }
+    tokenUsage: { completionTokens: 15, promptTokens: 12, totalTokens: 27 },
+    finish_reason: 'stop'
   }
 */
diff --git a/examples/src/models/chat/token_usage_tracking_anthropic.ts b/examples/src/models/chat/token_usage_tracking_anthropic.ts
@@ -0,0 +1,19 @@
+import { ChatAnthropic } from "@langchain/anthropic";
+
+const chatModel = new ChatAnthropic({
+  model: "claude-3-sonnet-20240229",
+});
+
+const res = await chatModel.invoke("Tell me a joke.");
+
+console.log(res.response_metadata);
+
+/*
+  {
+    id: 'msg_017Mgz6HdgNbi3cwL1LNB9Dw',
+    model: 'claude-3-sonnet-20240229',
+    stop_sequence: null,
+    usage: { input_tokens: 12, output_tokens: 30 },
+    stop_reason: 'end_turn'
+  }
+*/
diff --git a/examples/src/models/chat/token_usage_tracking_callback.ts b/examples/src/models/chat/token_usage_tracking_callback.ts
@@ -0,0 +1,59 @@
+import { ChatOpenAI } from "@langchain/openai";
+
+const chatModel = new ChatOpenAI({
+  model: "gpt-4-turbo",
+  callbacks: [
+    {
+      handleLLMEnd(output) {
+        console.log(JSON.stringify(output, null, 2));
+      },
+    },
+  ],
+});
+
+await chatModel.invoke("Tell me a joke.");
+
+/*
+  {
+    "generations": [
+      [
+        {
+          "text": "Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!",
+          "message": {
+            "lc": 1,
+            "type": "constructor",
+            "id": [
+              "langchain_core",
+              "messages",
+              "AIMessage"
+            ],
+            "kwargs": {
+              "content": "Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!",
+              "tool_calls": [],
+              "invalid_tool_calls": [],
+              "additional_kwargs": {},
+              "response_metadata": {
+                "tokenUsage": {
+                  "completionTokens": 17,
+                  "promptTokens": 12,
+                  "totalTokens": 29
+                },
+                "finish_reason": "stop"
+              }
+            }
+          },
+          "generationInfo": {
+            "finish_reason": "stop"
+          }
+        }
+      ]
+    ],
+    "llmOutput": {
+      "tokenUsage": {
+        "completionTokens": 17,
+        "promptTokens": 12,
+        "totalTokens": 29
+      }
+    }
+  }
+*/