Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs(cost-tracking): support arbitrary usage types #1129

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components-mdx/get-started-python-decorator-any-llm.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def anthropic_completion(**kwargs):
# See docs for more details on token counts and usd cost in Langfuse
# https://langfuse.com/docs/model-usage-and-cost
langfuse_context.update_current_observation(
usage={
usage_details={
"input": response.usage.input_tokens,
"output": response.usage.output_tokens
}
Expand Down
2 changes: 1 addition & 1 deletion cookbook/integration_amazon_bedrock.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
" response_text = response[\"output\"][\"message\"][\"content\"][0][\"text\"]\n",
" langfuse_context.update_current_observation(\n",
" output=response_text,\n",
" usage={\n",
" usage_details={\n",
" \"input\": response[\"usage\"][\"inputTokens\"],\n",
" \"output\": response[\"usage\"][\"outputTokens\"],\n",
" \"total\": response[\"usage\"][\"totalTokens\"]\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/integration_dspy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@
" output=o_content,\n",
" name=name,\n",
" metadata=kwargs,\n",
" usage=o.usage,\n",
" usage_details=o.usage,\n",
" model=o.model\n",
" )\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/integration_google_vertex_and_gemini.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@
" langfuse_context.update_current_observation(\n",
" input=input,\n",
" model=model_name,\n",
" usage={\n",
" usage_details={\n",
" \"input\": response.usage_metadata.prompt_token_count,\n",
" \"output\": response.usage_metadata.candidates_token_count,\n",
" \"total\": response.usage_metadata.total_token_count\n",
Expand Down
8 changes: 4 additions & 4 deletions cookbook/integration_mistral_sdk.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
"\n",
" # Log the usage details and output content after the LLM call\n",
" langfuse_context.update_current_observation(\n",
" usage={\n",
" usage_details={\n",
" \"input\": res.usage.prompt_tokens,\n",
" \"output\": res.usage.completion_tokens\n",
" },\n",
Expand Down Expand Up @@ -447,7 +447,7 @@
"\n",
" if chunk.data.choices[0].finish_reason == \"stop\":\n",
" langfuse_context.update_current_observation(\n",
" usage={\n",
" usage_details={\n",
" \"input\": chunk.data.usage.prompt_tokens,\n",
" \"output\": chunk.data.usage.completion_tokens\n",
" },\n",
Expand Down Expand Up @@ -553,7 +553,7 @@
" res = await mistral_client.chat.complete_async(**kwargs)\n",
"\n",
" langfuse_context.update_current_observation(\n",
" usage={\n",
" usage_details={\n",
" \"input\": res.usage.prompt_tokens,\n",
" \"output\": res.usage.completion_tokens\n",
" },\n",
Expand Down Expand Up @@ -717,7 +717,7 @@
"\n",
" if chunk.data.choices[0].finish_reason == \"stop\":\n",
" langfuse_context.update_current_observation(\n",
" usage={\n",
" usage_details={\n",
" \"input\": chunk.data.usage.prompt_tokens,\n",
" \"output\": chunk.data.usage.completion_tokens\n",
" },\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/integration_openai_assistants.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@
" trace_id=langfuse_context.get_current_trace_id(),\n",
" parent_observation_id=langfuse_context.get_current_observation_id(),\n",
" model=run.model,\n",
" usage=run.usage,\n",
" usage_details=run.usage,\n",
" input=input_messages,\n",
" output=assistant_response\n",
" )\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/js_langfuse_sdk.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@
"// Example end - sets endTime, optionally pass a body\n",
"generation.end({\n",
" output: chatCompletion.content[0].text,\n",
" usage: {\n",
" usageDetails: {\n",
" input: chatCompletion.usage.input_tokens,\n",
" output: chatCompletion.usage.output_tokens,\n",
" },\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/python_decorators.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
" # See docs for more details on token counts and usd cost in Langfuse\n",
" # https://langfuse.com/docs/model-usage-and-cost\n",
" langfuse_context.update_current_observation(\n",
" usage={\n",
" usage_details={\n",
" \"input\": response.usage.input_tokens,\n",
" \"output\": response.usage.output_tokens\n",
" }\n",
Expand Down
3 changes: 2 additions & 1 deletion cookbook/python_sdk_low_level.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,8 @@
"| model_parameters | object | yes | The parameters of the model used for the generation; can be any key-value pairs.\n",
"| input | object | yes | The prompt used for the generation. Can be any string or JSON object.\n",
"| output | string | yes | The completion generated by the model. Can be any string or JSON object.\n",
"| usage | object | yes | The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `\"TOKENS\"`, `\"CHARACTERS\"`, `\"MILLISECONDS\"`, `\"SECONDS\"`, or `\"IMAGES\"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
"| usage_details | object | yes | The usage object supports arbitrary usage types with their units of consumption. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
"| cost_details | object | yes | The cost object supports arbitrary cost types with their units of consumption. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.\n",
"| metadata | object | yes | Additional metadata of the generation. Can be any JSON object. Metadata is merged when being updated via the API.\n",
"| level | string | yes | The level of the generation. Can be `DEBUG`, `DEFAULT`, `WARNING` or `ERROR`. Used for sorting/filtering of traces with elevated error levels and for highlighting in the UI.\n",
"| status_message | string | yes | The status message of the generation. Additional field for context of the event. E.g. the error message of an error event.\n",
Expand Down
17 changes: 17 additions & 0 deletions pages/changelog/2024-12-20-improved-cost-tracking.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
date: 2024-12-20
title: Improved cost tracking
description: Langfuse now supports cost tracking all usage types such as cached tokens, audio tokens, reasoning tokens, etc.
author: Hassieb
ogImage: /images/changelog/2024-12-20-improved-cost-tracking.png
---

import { ChangelogHeader } from "@/components/changelog/ChangelogHeader";

<ChangelogHeader />

LLMs have grown more powerful by supporting multi-modal generations, reasoning, and caching. As LLM usage pricing departs from a simple input/output token count, we are excited that Langfuse now supports cost tracking for arbitrary usage types. Generation cost are now accurately calculated and displayed in the UI.

In the Langfuse UI, you can now create LLM model definitions with prices for arbitrary usage types. When ingesting generations, you can provide the units consumed for each usage type. Langfuse will then calculate the cost for each generation.

**Learn more about [cost tracking with Langfuse](/docs/model-usage-and-cost)**
2 changes: 1 addition & 1 deletion pages/docs/integrations/amazon-bedrock.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def wrapped_bedrock_converse(**kwargs):
response_text = response["output"]["message"]["content"][0]["text"]
langfuse_context.update_current_observation(
output=response_text,
usage={
usage_details={
"input": response["usage"]["inputTokens"],
"output": response["usage"]["outputTokens"],
"total": response["usage"]["totalTokens"]
Expand Down
2 changes: 1 addition & 1 deletion pages/docs/integrations/dspy.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class CustomTracker(LangfuseTracker):
output=o_content,
name=name,
metadata=kwargs,
usage=o.usage,
usage_details=o.usage,
model=o.model
)

Expand Down
2 changes: 1 addition & 1 deletion pages/docs/integrations/google-vertex-ai.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def vertex_generate_content(input, model_name = "gemini-pro"):
langfuse_context.update_current_observation(
input=input,
model=model_name,
usage={
usage_details={
"input": response.usage_metadata.prompt_token_count,
"output": response.usage_metadata.candidates_token_count,
"total": response.usage_metadata.total_token_count
Expand Down
8 changes: 4 additions & 4 deletions pages/docs/integrations/mistral-sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def mistral_completion(**kwargs):

# Log the usage details and output content after the LLM call
langfuse_context.update_current_observation(
usage={
usage_details={
"input": res.usage.prompt_tokens,
"output": res.usage.completion_tokens
},
Expand Down Expand Up @@ -235,7 +235,7 @@ def stream_mistral_completion(**kwargs):

if chunk.data.choices[0].finish_reason == "stop":
langfuse_context.update_current_observation(
usage={
usage_details={
"input": chunk.data.usage.prompt_tokens,
"output": chunk.data.usage.completion_tokens
},
Expand Down Expand Up @@ -349,7 +349,7 @@ async def async_mistral_completion(**kwargs):
res = await mistral_client.chat.complete_async(**kwargs)

langfuse_context.update_current_observation(
usage={
usage_details={
"input": res.usage.prompt_tokens,
"output": res.usage.completion_tokens
},
Expand Down Expand Up @@ -427,7 +427,7 @@ async def async_stream_mistral_completion(**kwargs):

if chunk.data.choices[0].finish_reason == "stop":
langfuse_context.update_current_observation(
usage={
usage_details={
"input": chunk.data.usage.prompt_tokens,
"output": chunk.data.usage.completion_tokens
},
Expand Down
2 changes: 1 addition & 1 deletion pages/docs/integrations/openai/python/assistants-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def get_response(thread_id, run_id):
trace_id=langfuse_context.get_current_trace_id(),
parent_observation_id=langfuse_context.get_current_observation_id(),
model=run.model,
usage=run.usage,
usage_details=run.usage,
input=input_messages,
output=assistant_response
)
Expand Down
Loading
Loading