-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Track token usage of iris requests #165
Changes from 17 commits
fc44738
26e3873
aa50faf
9905460
e241d45
4502e30
3b81a30
74b1239
6bcb002
4324180
c9e89be
4c92900
6bd4b33
c79837d
4d61c85
3253c46
9fe9e0a
13c5db1
dd504fc
043264a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from enum import Enum | ||
|
||
|
||
class PipelineEnum(str, Enum): | ||
IRIS_CODE_FEEDBACK = "IRIS_CODE_FEEDBACK" | ||
IRIS_CHAT_COURSE_MESSAGE = "IRIS_CHAT_COURSE_MESSAGE" | ||
IRIS_CHAT_EXERCISE_MESSAGE = "IRIS_CHAT_EXERCISE_MESSAGE" | ||
IRIS_INTERACTION_SUGGESTION = "IRIS_INTERACTION_SUGGESTION" | ||
IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE" | ||
IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION" | ||
IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE" | ||
IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE" | ||
IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" | ||
IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" | ||
IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" | ||
NOT_SET = "NOT_SET" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from pydantic import BaseModel, Field | ||
|
||
from app.common.PipelineEnum import PipelineEnum | ||
|
||
|
||
class TokenUsageDTO(BaseModel): | ||
model_info: str = Field(alias="model", default="") | ||
num_input_tokens: int = Field(alias="numInputTokens", default=0) | ||
cost_per_input_token: float = Field(alias="costPerMillionInputToken", default=0) | ||
num_output_tokens: int = Field(alias="numOutputTokens", default=0) | ||
cost_per_output_token: float = Field(alias="costPerMillionOutputToken", default=0) | ||
pipeline: PipelineEnum = Field(alias="pipelineId", default=PipelineEnum.NOT_SET) | ||
|
||
def __str__(self): | ||
return ( | ||
f"{self.model_info}: {self.num_input_tokens} input cost: {self.cost_per_input_token}," | ||
f" {self.num_output_tokens} output cost: {self.cost_per_output_token}, pipeline: {self.pipeline} " | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,11 @@ | |
from ollama import Client, Message | ||
|
||
from ...common.message_converters import map_role_to_str, map_str_to_role | ||
from ...common.pyris_message import PyrisMessage | ||
from ...common.token_usage_dto import TokenUsageDTO | ||
from ...domain.data.json_message_content_dto import JsonMessageContentDTO | ||
from ...domain.data.text_message_content_dto import TextMessageContentDTO | ||
from ...domain.data.image_message_content_dto import ImageMessageContentDTO | ||
from ...domain import PyrisMessage | ||
from ...llm import CompletionArguments | ||
from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel | ||
|
||
|
@@ -57,15 +58,23 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: | |
return messages_to_return | ||
|
||
|
||
def convert_to_iris_message(message: Message) -> PyrisMessage: | ||
def convert_to_iris_message( | ||
message: Message, num_input_tokens: int, num_output_tokens: int, model: str | ||
) -> PyrisMessage: | ||
""" | ||
Convert a Message to a PyrisMessage | ||
""" | ||
contents = [TextMessageContentDTO(text_content=message["content"])] | ||
tokens = TokenUsageDTO( | ||
numInputTokens=num_input_tokens, | ||
numOutputTokens=num_output_tokens, | ||
model=model, | ||
) | ||
Comment on lines
+68
to
+72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add validation for token count parameters. The token counts should be validated to ensure they are not negative values before creating the TokenUsageDTO. + if num_input_tokens is not None and num_input_tokens < -1:
+ raise ValueError("Input token count cannot be less than -1")
+ if num_output_tokens is not None and num_output_tokens < -1:
+ raise ValueError("Output token count cannot be less than -1")
tokens = TokenUsageDTO(
numInputTokens=num_input_tokens,
numOutputTokens=num_output_tokens,
model=model,
)
|
||
return PyrisMessage( | ||
sender=map_str_to_role(message["role"]), | ||
contents=contents, | ||
send_at=datetime.now(), | ||
sentAt=datetime.now(), | ||
token_usage=tokens, | ||
) | ||
|
||
|
||
|
@@ -108,7 +117,12 @@ def chat( | |
format="json" if arguments.response_format == "JSON" else "", | ||
options=self.options, | ||
) | ||
return convert_to_iris_message(response["message"]) | ||
return convert_to_iris_message( | ||
response.get("message"), | ||
response.get("prompt_eval_count", -1), | ||
response.get("eval_count", -1), | ||
response.get("model", self.model), | ||
) | ||
|
||
def embed(self, text: str) -> list[float]: | ||
response = self._client.embeddings( | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -1,7 +1,7 @@ | ||||||||||
import logging | ||||||||||
import time | ||||||||||
from datetime import datetime | ||||||||||
from typing import Literal, Any | ||||||||||
from typing import Literal, Any, Optional | ||||||||||
|
||||||||||
from openai import ( | ||||||||||
OpenAI, | ||||||||||
|
@@ -12,12 +12,14 @@ | |||||||||
ContentFilterFinishReasonError, | ||||||||||
) | ||||||||||
from openai.lib.azure import AzureOpenAI | ||||||||||
from openai.types import CompletionUsage | ||||||||||
from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam | ||||||||||
from openai.types.shared_params import ResponseFormatJSONObject | ||||||||||
|
||||||||||
from ...common.message_converters import map_str_to_role, map_role_to_str | ||||||||||
from app.domain.data.text_message_content_dto import TextMessageContentDTO | ||||||||||
from ...domain import PyrisMessage | ||||||||||
from ...common.pyris_message import PyrisMessage | ||||||||||
from ...common.token_usage_dto import TokenUsageDTO | ||||||||||
from ...domain.data.image_message_content_dto import ImageMessageContentDTO | ||||||||||
from ...domain.data.json_message_content_dto import JsonMessageContentDTO | ||||||||||
from ...llm import CompletionArguments | ||||||||||
|
@@ -67,15 +69,28 @@ def convert_to_open_ai_messages( | |||||||||
return openai_messages | ||||||||||
|
||||||||||
|
||||||||||
def convert_to_iris_message(message: ChatCompletionMessage) -> PyrisMessage: | ||||||||||
def convert_to_iris_message( | ||||||||||
message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str | ||||||||||
) -> PyrisMessage: | ||||||||||
Comment on lines
+72
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Calls to
🔗 Analysis chainEnsure all calls to Since the function You can run the following script to identify any calls that may need to be updated: 🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Description: Find all usages of `convert_to_iris_message` and check if they match the new signature.
# Expected: All calls should include three parameters: message, usage, and model.
rg --type python $'convert_to_iris_message\s*\(' -A 1
Length of output: 85 Script: #!/bin/bash
# Description: Find all usages of `convert_to_iris_message` in Python files and check if they match the new signature.
# Expected: All calls should include three parameters: message, usage, and model.
rg --glob "*.py" 'convert_to_iris_message\s*\(' -A 1
Length of output: 723 |
||||||||||
""" | ||||||||||
Convert a ChatCompletionMessage to a PyrisMessage | ||||||||||
""" | ||||||||||
return PyrisMessage( | ||||||||||
num_input_tokens = getattr(usage, "prompt_tokens", -1) | ||||||||||
alexjoham marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
num_output_tokens = getattr(usage, "completion_tokens", -1) | ||||||||||
alexjoham marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using 0 as the default value for token counts The current implementation uses -1 as the default value for Suggested change: - num_input_tokens = getattr(usage, "prompt_tokens", -1)
- num_output_tokens = getattr(usage, "completion_tokens", -1)
+ num_input_tokens = getattr(usage, "prompt_tokens", 0)
+ num_output_tokens = getattr(usage, "completion_tokens", 0) This change would make the default values more intuitive and consistent with other parts of the codebase. 📝 Committable suggestion
Suggested change
|
||||||||||
|
||||||||||
tokens = TokenUsageDTO( | ||||||||||
model=model, | ||||||||||
numInputTokens=num_input_tokens, | ||||||||||
numOutputTokens=num_output_tokens, | ||||||||||
) | ||||||||||
|
||||||||||
message = PyrisMessage( | ||||||||||
sender=map_str_to_role(message.role), | ||||||||||
contents=[TextMessageContentDTO(textContent=message.content)], | ||||||||||
send_at=datetime.now(), | ||||||||||
sentAt=datetime.now(), | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Use UTC timestamps to ensure consistency across time zones In the construction of Apply this diff to update the timestamp: - sentAt=datetime.now(),
+ sentAt=datetime.utcnow(), 📝 Committable suggestion
Suggested change
|
||||||||||
token_usage=tokens, | ||||||||||
) | ||||||||||
return message | ||||||||||
|
||||||||||
|
||||||||||
class OpenAIChatModel(ChatModel): | ||||||||||
|
@@ -113,13 +128,15 @@ def chat( | |||||||||
max_tokens=arguments.max_tokens, | ||||||||||
) | ||||||||||
choice = response.choices[0] | ||||||||||
usage = response.usage | ||||||||||
model = response.model | ||||||||||
if choice.finish_reason == "content_filter": | ||||||||||
# I figured that an openai error would be automatically raised if the content filter activated, | ||||||||||
# but it seems that that is not the case. | ||||||||||
# We don't want to retry because the same message will likely be rejected again. | ||||||||||
# Raise an exception to trigger the global error handler and report a fatal error to the client. | ||||||||||
raise ContentFilterFinishReasonError() | ||||||||||
return convert_to_iris_message(choice.message) | ||||||||||
return convert_to_iris_message(choice.message, usage, model) | ||||||||||
except ( | ||||||||||
APIError, | ||||||||||
APITimeoutError, | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
from abc import ABCMeta, abstractmethod | ||
from typing import Optional | ||
|
||
from ...domain import PyrisMessage | ||
from ...common.pyris_message import PyrisMessage | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Inconsistent import paths for
Please update these import statements to match the standardized patterns:
This will ensure consistency across the codebase and prevent potential import issues. 🔗 Analysis chainImport path update looks good. The import statement for To ensure this change is consistent across the codebase, let's verify the usage of [/run_scripts] [/run_scripts] [/run_scripts] 🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Description: Verify the import path for PyrisMessage across the codebase
# Test: Search for PyrisMessage imports
rg --type python "from.*import.*PyrisMessage"
# Test: Search for PyrisMessage usage
rg --type python "PyrisMessage"
Length of output: 141 Script: #!/bin/bash
# Description: Verify the import path for PyrisMessage across the codebase
# Test: Search for PyrisMessage imports
rg --glob "*.py" "from.*import.*PyrisMessage"
# Test: Search for PyrisMessage usage
rg --glob "*.py" "PyrisMessage"
Length of output: 8395 Script: #!/bin/bash
# Description: Identify inconsistent import paths for PyrisMessage
# Define acceptable import patterns
acceptable_patterns="from ...common.pyris_message import PyrisMessage|from app.common.pyris_message import PyrisMessage"
# Search for PyrisMessage imports that do NOT match the acceptable patterns
rg --glob "*.py" "from\s+(?!\.\.\.common\.pyris_message|app\.common\.pyris_message)\w+.*import\s+PyrisMessage"
Length of output: 537 Script: #!/bin/bash
# Description: Identify inconsistent import paths for PyrisMessage without using look-around
# Step 1: Find all PyrisMessage import statements
rg --glob "*.py" "from.*import.*PyrisMessage" > all_pyris_imports.txt
# Step 2: Exclude acceptable import patterns
rg -v -e "from \.\.\.common\.pyris_message import PyrisMessage" -e "from app\.common\.pyris_message import PyrisMessage" all_pyris_imports.txt
Length of output: 383 |
||
from ...domain.data.image_message_content_dto import ImageMessageContentDTO | ||
from ...llm import CompletionArguments | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Codebase verification
Issues Found: Inconsistent Import Paths for
PyrisMessage
The import statements for
PyrisMessage
are not fully consistent across the codebase:from ..common.pyris_message import PyrisMessage
from app.common.pyris_message import PyrisMessage
Please standardize all import paths to
from ...common.pyris_message import PyrisMessage
to ensure consistency and prevent potential issues.🔗 Analysis chain
LGTM! Verify import consistency across the codebase.
The import statement for
PyrisMessage
has been updated to reflect the new module structure. This change is in line with the PR objectives.To ensure consistency across the codebase, please run the following script:
This script will help identify any inconsistencies in the import statements across the project.
🏁 Scripts executed
The following scripts were executed for the analysis:
Script:
Length of output: 615
Script:
Length of output: 2314