From 7e52afb1f52873dc2344faa6d6ddffb9e20426cb Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 21 Oct 2024 18:20:02 +0200 Subject: [PATCH] fix token usage response --- llm_core/.env.example | 4 ++- llm_core/llm_core/models/callbacks.py | 32 ++++++++++++------- .../modeling/module_modeling_llm/.env.example | 2 ++ .../module_programming_llm/.env.example | 4 ++- modules/text/module_text_llm/.env.example | 4 ++- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/llm_core/.env.example b/llm_core/.env.example index 34e279aab..823e9a302 100644 --- a/llm_core/.env.example +++ b/llm_core/.env.example @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite # Default model to use # See below for options, available models are also logged on startup -LLM_DEFAULT_MODEL="azure_openai_gpt-35-turbo" +LLM_DEFAULT_MODEL="azure_openai_gpt-4o" +LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5 +LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15 # Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled LLM_ENABLE_LLM_AS_A_JUDGE=1 diff --git a/llm_core/llm_core/models/callbacks.py b/llm_core/llm_core/models/callbacks.py index d33b0a6bf..dc8d7b329 100644 --- a/llm_core/llm_core/models/callbacks.py +++ b/llm_core/llm_core/models/callbacks.py @@ -1,3 +1,5 @@ +import os + from langchain.callbacks.base import BaseCallbackHandler from langchain_core.outputs import LLMResult from langchain_core.messages.ai import UsageMetadata @@ -7,10 +9,13 @@ class UsageHandler(BaseCallbackHandler): def on_llm_end(self, response: LLMResult, **kwargs) -> None: + cost_per_million_input_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN", 0.0)) + cost_per_million_output_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN", 0.0)) + meta = get_meta() - total_usage = meta.get("total_usage", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) - llm_calls = meta.get("llm_calls", []) + total_usage = meta.get("totalUsage", {"numInputTokens": 0, "numOutputTokens": 0, "numTotalTokens": 0, "cost": 0 }) + llm_calls = meta.get("llmRequests", []) for generations in response.generations: for generation in generations: @@ -18,16 +23,21 @@ def on_llm_end(self, response: LLMResult, **kwargs) -> None: generation_usage: UsageMetadata = message["usage_metadata"] model_name = message["response_metadata"].get("model_name", None) - total_usage["input_tokens"] += generation_usage["input_tokens"] - total_usage["output_tokens"] += generation_usage["output_tokens"] - total_usage["total_tokens"] += generation_usage["total_tokens"] + total_usage["numInputTokens"] += generation_usage["input_tokens"] + total_usage["numOutputTokens"] += generation_usage["output_tokens"] + total_usage["numTotalTokens"] += generation_usage["total_tokens"] + + total_usage["cost"] += int(generation_usage["input_tokens"]) * cost_per_million_output_tokens / 1_000_000 + total_usage["cost"] += int(generation_usage["output_tokens"]) * cost_per_million_output_tokens / 1_000_000 llm_calls.append({ - "model_name": model_name, - "input_tokens": generation_usage["input_tokens"], - "output_tokens": generation_usage["output_tokens"], - "total_tokens": generation_usage["total_tokens"], + "model": model_name, + "costPerMillionInputToken": cost_per_million_input_tokens, + "costPerMillionOutputToken": cost_per_million_output_tokens, + "numInputTokens": generation_usage["input_tokens"], + "numOutputTokens": generation_usage["output_tokens"], + "numTotalTokens": generation_usage["total_tokens"], }) - emit_meta("total_usage", total_usage) - emit_meta("llm_calls", llm_calls) + emit_meta("totalUsage", total_usage) + emit_meta("llmRequests", llm_calls) diff --git a/modules/modeling/module_modeling_llm/.env.example b/modules/modeling/module_modeling_llm/.env.example index 68b0577a1..ebeeb7bc5 100644 --- a/modules/modeling/module_modeling_llm/.env.example +++ b/modules/modeling/module_modeling_llm/.env.example @@ -13,6 +13,8 @@ DATABASE_URL=sqlite:///../data/data.sqlite # Default model to use # See below for options, available models are also logged on startup LLM_DEFAULT_MODEL="azure_openai_gpt-4o" +LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5 +LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15 # Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled LLM_ENABLE_LLM_AS_A_JUDGE=1 diff --git a/modules/programming/module_programming_llm/.env.example b/modules/programming/module_programming_llm/.env.example index 2b6d9a889..753874245 100644 --- a/modules/programming/module_programming_llm/.env.example +++ b/modules/programming/module_programming_llm/.env.example @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite # Default model to use # See below for options, available models are also logged on startup -LLM_DEFAULT_MODEL="azure_openai_gpt-35" +LLM_DEFAULT_MODEL="azure_openai_gpt-4o" +LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5 +LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15 # Standard OpenAI (Non-Azure) [leave blank if not used] # Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` diff --git a/modules/text/module_text_llm/.env.example b/modules/text/module_text_llm/.env.example index 34e279aab..823e9a302 100644 --- a/modules/text/module_text_llm/.env.example +++ b/modules/text/module_text_llm/.env.example @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite # Default model to use # See below for options, available models are also logged on startup -LLM_DEFAULT_MODEL="azure_openai_gpt-35-turbo" +LLM_DEFAULT_MODEL="azure_openai_gpt-4o" +LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5 +LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15 # Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled LLM_ENABLE_LLM_AS_A_JUDGE=1