langfuse · hassiebp · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/components-mdx/get-started-python-decorator-any-llm.mdx b/components-mdx/get-started-python-decorator-any-llm.mdx
@@ -22,7 +22,7 @@ def anthropic_completion(**kwargs):
   # See docs for more details on token counts and usd cost in Langfuse
   # https://langfuse.com/docs/model-usage-and-cost
   langfuse_context.update_current_observation(
-      usage={
+      usage_details={
           "input": response.usage.input_tokens,
           "output": response.usage.output_tokens
       }

diff --git a/cookbook/integration_amazon_bedrock.ipynb b/cookbook/integration_amazon_bedrock.ipynb
@@ -210,7 +210,7 @@
         "  response_text = response[\"output\"][\"message\"][\"content\"][0][\"text\"]\n",
         "  langfuse_context.update_current_observation(\n",
         "    output=response_text,\n",
-        "    usage={\n",
+        "    usage_details={\n",
         "        \"input\": response[\"usage\"][\"inputTokens\"],\n",
         "        \"output\": response[\"usage\"][\"outputTokens\"],\n",
         "        \"total\": response[\"usage\"][\"totalTokens\"]\n",

diff --git a/cookbook/integration_dspy.ipynb b/cookbook/integration_dspy.ipynb
@@ -233,7 +233,7 @@
         "            output=o_content,\n",
         "            name=name,\n",
         "            metadata=kwargs,\n",
-        "            usage=o.usage,\n",
+        "            usage_details=o.usage,\n",
         "            model=o.model\n",
         "        )\n",
         "\n",

diff --git a/cookbook/integration_google_vertex_and_gemini.ipynb b/cookbook/integration_google_vertex_and_gemini.ipynb
@@ -199,7 +199,7 @@
         "  langfuse_context.update_current_observation(\n",
         "      input=input,\n",
         "      model=model_name,\n",
-        "      usage={\n",
+        "      usage_details={\n",
         "          \"input\": response.usage_metadata.prompt_token_count,\n",
         "          \"output\": response.usage_metadata.candidates_token_count,\n",
         "          \"total\": response.usage_metadata.total_token_count\n",

diff --git a/cookbook/integration_mistral_sdk.ipynb b/cookbook/integration_mistral_sdk.ipynb
@@ -159,7 +159,7 @@
         "\n",
         "  # Log the usage details and output content after the LLM call\n",
         "  langfuse_context.update_current_observation(\n",
-        "      usage={\n",
+        "      usage_details={\n",
         "          \"input\": res.usage.prompt_tokens,\n",
         "          \"output\": res.usage.completion_tokens\n",
         "      },\n",
@@ -447,7 +447,7 @@
         "\n",
         "        if chunk.data.choices[0].finish_reason == \"stop\":\n",
         "            langfuse_context.update_current_observation(\n",
-        "                usage={\n",
+        "                usage_details={\n",
         "                    \"input\": chunk.data.usage.prompt_tokens,\n",
         "                    \"output\": chunk.data.usage.completion_tokens\n",
         "                },\n",
@@ -553,7 +553,7 @@
         "  res = await mistral_client.chat.complete_async(**kwargs)\n",
         "\n",
         "  langfuse_context.update_current_observation(\n",
-        "      usage={\n",
+        "      usage_details={\n",
         "          \"input\": res.usage.prompt_tokens,\n",
         "          \"output\": res.usage.completion_tokens\n",
         "      },\n",
@@ -717,7 +717,7 @@
         "\n",
         "        if chunk.data.choices[0].finish_reason == \"stop\":\n",
         "            langfuse_context.update_current_observation(\n",
-        "                usage={\n",
+        "                usage_details={\n",
         "                    \"input\": chunk.data.usage.prompt_tokens,\n",
         "                    \"output\": chunk.data.usage.completion_tokens\n",
         "                },\n",

diff --git a/cookbook/integration_openai_assistants.ipynb b/cookbook/integration_openai_assistants.ipynb
@@ -219,7 +219,7 @@
     "        trace_id=langfuse_context.get_current_trace_id(),\n",
     "        parent_observation_id=langfuse_context.get_current_observation_id(),\n",
     "        model=run.model,\n",
-    "        usage=run.usage,\n",
+    "        usage_details=run.usage,\n",
     "        input=input_messages,\n",
     "        output=assistant_response\n",
     "    )\n",

diff --git a/cookbook/js_langfuse_sdk.ipynb b/cookbook/js_langfuse_sdk.ipynb
@@ -200,7 +200,7 @@
     "// Example end - sets endTime, optionally pass a body\n",
     "generation.end({\n",
     "  output: chatCompletion.content[0].text,\n",
-    "  usage: {\n",
+    "  usageDetails: {\n",
     "    input: chatCompletion.usage.input_tokens,\n",
     "    output: chatCompletion.usage.output_tokens,\n",
     "  },\n",

diff --git a/cookbook/python_decorators.ipynb b/cookbook/python_decorators.ipynb
@@ -241,7 +241,7 @@
         "  # See docs for more details on token counts and usd cost in Langfuse\n",
         "  # https://langfuse.com/docs/model-usage-and-cost\n",
         "  langfuse_context.update_current_observation(\n",
-        "      usage={\n",
+        "      usage_details={\n",
         "          \"input\": response.usage.input_tokens,\n",
         "          \"output\": response.usage.output_tokens\n",
         "      }\n",

diff --git a/cookbook/python_sdk_low_level.ipynb b/cookbook/python_sdk_low_level.ipynb
@@ -402,7 +402,8 @@
         "| model_parameters | object | yes | The parameters of the model used for the generation; can be any key-value pairs.\n",
         "| input | object | yes | The prompt used for the generation. Can be any string or JSON object.\n",
         "| output | string | yes | The completion generated by the model. Can be any string or JSON object.\n",
-        "| usage | object | yes | The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `\"TOKENS\"`, `\"CHARACTERS\"`, `\"MILLISECONDS\"`, `\"SECONDS\"`, or `\"IMAGES\"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
+        "| usage_details | object | yes | The usage object supports arbitrary usage types with their units of consumption. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
+        "| cost_details | object | yes | The cost object supports arbitrary cost types with their units of consumption. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
         "| metadata | object | yes | Additional metadata of the generation. Can be any JSON object. Metadata is merged when being updated via the API.\n",
         "| level | string | yes | The level of the generation. Can be `DEBUG`, `DEFAULT`, `WARNING` or `ERROR`. Used for sorting/filtering of traces with elevated error levels and for highlighting in the UI.\n",
         "| status_message | string | yes | The status message of the generation. Additional field for context of the event. E.g. the error message of an error event.\n",

diff --git a/pages/changelog/2024-12-20-improved-cost-tracking.mdx b/pages/changelog/2024-12-20-improved-cost-tracking.mdx
@@ -0,0 +1,17 @@
+---
+date: 2024-12-20
+title: Improved cost tracking
+description: Langfuse now supports cost tracking all usage types such as cached tokens, audio tokens, reasoning tokens, etc.
+author: Hassieb
+ogImage: /images/changelog/2024-12-20-improved-cost-tracking.png
+---
+
+import { ChangelogHeader } from "@/components/changelog/ChangelogHeader";
+
+<ChangelogHeader />
+
+LLMs have grown more powerful by supporting multi-modal generations, reasoning, and caching. As LLM usage pricing departs from a simple input/output token count, we are excited that Langfuse now supports cost tracking for arbitrary usage types. Generation cost are now accurately calculated and displayed in the UI.
+
+In the Langfuse UI, you can now create LLM model definitions with prices for arbitrary usage types. When ingesting generations, you can provide the units consumed for each usage type. Langfuse will then calculate the cost for each generation.
+
+**Learn more about [cost tracking with Langfuse](/docs/model-usage-and-cost)**
diff --git a/pages/docs/integrations/amazon-bedrock.md b/pages/docs/integrations/amazon-bedrock.md
@@ -134,7 +134,7 @@ def wrapped_bedrock_converse(**kwargs):
   response_text = response["output"]["message"]["content"][0]["text"]
   langfuse_context.update_current_observation(
     output=response_text,
-    usage={
+    usage_details={
         "input": response["usage"]["inputTokens"],
         "output": response["usage"]["outputTokens"],
         "total": response["usage"]["totalTokens"]

diff --git a/pages/docs/integrations/dspy.md b/pages/docs/integrations/dspy.md
@@ -123,7 +123,7 @@ class CustomTracker(LangfuseTracker):
             output=o_content,
             name=name,
             metadata=kwargs,
-            usage=o.usage,
+            usage_details=o.usage,
             model=o.model
         )
 

diff --git a/pages/docs/integrations/google-vertex-ai.md b/pages/docs/integrations/google-vertex-ai.md
@@ -123,7 +123,7 @@ def vertex_generate_content(input, model_name = "gemini-pro"):
   langfuse_context.update_current_observation(
       input=input,
       model=model_name,
-      usage={
+      usage_details={
           "input": response.usage_metadata.prompt_token_count,
           "output": response.usage_metadata.candidates_token_count,
           "total": response.usage_metadata.total_token_count

diff --git a/pages/docs/integrations/mistral-sdk.md b/pages/docs/integrations/mistral-sdk.md
@@ -100,7 +100,7 @@ def mistral_completion(**kwargs):
 
   # Log the usage details and output content after the LLM call
   langfuse_context.update_current_observation(
-      usage={
+      usage_details={
           "input": res.usage.prompt_tokens,
           "output": res.usage.completion_tokens
       },
@@ -235,7 +235,7 @@ def stream_mistral_completion(**kwargs):
 
         if chunk.data.choices[0].finish_reason == "stop":
             langfuse_context.update_current_observation(
-                usage={
+                usage_details={
                     "input": chunk.data.usage.prompt_tokens,
                     "output": chunk.data.usage.completion_tokens
                 },
@@ -349,7 +349,7 @@ async def async_mistral_completion(**kwargs):
   res = await mistral_client.chat.complete_async(**kwargs)
 
   langfuse_context.update_current_observation(
-      usage={
+      usage_details={
           "input": res.usage.prompt_tokens,
           "output": res.usage.completion_tokens
       },
@@ -427,7 +427,7 @@ async def async_stream_mistral_completion(**kwargs):
 
         if chunk.data.choices[0].finish_reason == "stop":
             langfuse_context.update_current_observation(
-                usage={
+                usage_details={
                     "input": chunk.data.usage.prompt_tokens,
                     "output": chunk.data.usage.completion_tokens
                 },

diff --git a/pages/docs/integrations/openai/python/assistants-api.md b/pages/docs/integrations/openai/python/assistants-api.md
@@ -141,7 +141,7 @@ def get_response(thread_id, run_id):
         trace_id=langfuse_context.get_current_trace_id(),
         parent_observation_id=langfuse_context.get_current_observation_id(),
         model=run.model,
-        usage=run.usage,
+        usage_details=run.usage,
         input=input_messages,
         output=assistant_response
     )