Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track token usage of iris requests #165

Merged
merged 20 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
fc44738
Add token usage monitoring in exercise chat and send to Artemis
alexjoham Oct 1, 2024
26e3873
Add Pipeline enum for better tracking
alexjoham Oct 11, 2024
aa50faf
Update tokens location, add token tracking to competency and chat pipe
alexjoham Oct 11, 2024
9905460
added first versions for tracking for smaller pipelines
alexjoham Oct 11, 2024
e241d45
Fix lint errors
alexjoham Oct 11, 2024
4502e30
Fix last lint error
alexjoham Oct 11, 2024
3b81a30
Fix lint errors
alexjoham Oct 11, 2024
74b1239
Merge remote-tracking branch 'origin/feature/track-usage-of-iris-requ…
alexjoham Oct 11, 2024
6bcb002
Merge branch 'main' into track-token-usage
alexjoham Oct 11, 2024
4324180
Add token cost tracking for input and output tokens
alexjoham Oct 12, 2024
c9e89be
Update token handling as proposed by CodeRabbit
alexjoham Oct 12, 2024
4c92900
Update PyrisMessage to use only TokenUsageDTO, add token count for error
alexjoham Oct 12, 2024
6bd4b33
Fix competency extraction did not save Enum
alexjoham Oct 12, 2024
c79837d
Merge branch 'main' into track-token-usage
alexjoham Oct 15, 2024
4d61c85
Update code after merge
alexjoham Oct 15, 2024
3253c46
Make -1 default value if no tokens have been received
alexjoham Oct 16, 2024
9fe9e0a
Update DTO for new Artemis table
alexjoham Oct 19, 2024
13c5db1
Change number of tokens if error to 0, as is standard by OpenAI & Ollama
alexjoham Oct 23, 2024
dd504fc
Fix token usage list append bug
bassner Oct 23, 2024
043264a
Fix formatting
bassner Oct 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix lint errors
  • Loading branch information
alexjoham committed Oct 11, 2024
commit e241d457b86e97ad4df94fa91e11db80ca28d552
2 changes: 1 addition & 1 deletion app/domain/data/token_usage_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ class TokenUsageDTO(BaseModel):
model_info: str
num_input_tokens: int
num_output_tokens: int
pipeline: PipelineEnum
pipeline: PipelineEnum
8 changes: 7 additions & 1 deletion app/llm/external/LLMTokenCount.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ class LLMTokenCount:
num_output_tokens: int
pipeline: PipelineEnum

def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int, pipeline: PipelineEnum):
def __init__(
self,
model_info: str,
num_input_tokens: int,
num_output_tokens: int,
pipeline: PipelineEnum,
):
self.model_info = model_info
self.num_input_tokens = num_input_tokens
self.num_output_tokens = num_output_tokens
Expand Down
11 changes: 9 additions & 2 deletions app/llm/external/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]:
return messages_to_return


def convert_to_iris_message(message: Message, num_input_tokens: int, num_output_tokens: int, model: str) -> PyrisMessage:
def convert_to_iris_message(
message: Message, num_input_tokens: int, num_output_tokens: int, model: str
) -> PyrisMessage:
"""
Convert a Message to a PyrisMessage
"""
Expand Down Expand Up @@ -111,7 +113,12 @@ def chat(
format="json" if arguments.response_format == "JSON" else "",
options=self.options,
)
return convert_to_iris_message(response["message"], response["prompt_eval_count"], response["eval_count"], response["model"])
return convert_to_iris_message(
response["message"],
response["prompt_eval_count"],
response["eval_count"],
response["model"],
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling for missing keys in 'response' dictionary

When accessing response["message"], response["prompt_eval_count"], response["eval_count"], and response["model"], there's a risk of a KeyError if any of these keys are missing. It's safer to use the get method with default values or implement error handling to manage potential missing data.

Apply this diff to handle missing keys gracefully:

def chat(
    self, messages: list[PyrisMessage], arguments: CompletionArguments
) -> PyrisMessage:
    response = self._client.chat(
        model=self.model,
        messages=convert_to_ollama_messages(messages),
        format="json" if arguments.response_format == "JSON" else "",
        options=self.options,
    )
    return convert_to_iris_message(
-        response["message"],
-        response["prompt_eval_count"],
-        response["eval_count"],
-        response["model"],
+        response.get("message"),
+        response.get("prompt_eval_count", 0),
+        response.get("eval_count", 0),
+        response.get("model", self.model),
    )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return convert_to_iris_message(
response["message"],
response["prompt_eval_count"],
response["eval_count"],
response["model"],
)
return convert_to_iris_message(
response.get("message"),
response.get("prompt_eval_count", 0),
response.get("eval_count", 0),
response.get("model", self.model),
)


def embed(self, text: str) -> list[float]:
response = self._client.embeddings(
Expand Down
15 changes: 10 additions & 5 deletions app/llm/external/openai_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,26 @@ def convert_to_open_ai_messages(
return openai_messages


def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str) -> PyrisMessage:
def convert_to_iris_message(
message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str
) -> PyrisMessage:
Comment on lines +72 to +74
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Calls to convert_to_iris_message in app/llm/external/ollama.py do not match the updated function signature. Please update these calls to include all required parameters: message, num_input_tokens, num_output_tokens, and model.

  • app/llm/external/ollama.py:
    • Line where convert_to_iris_message is called with insufficient arguments.
🔗 Analysis chain

Ensure all calls to convert_to_iris_message are updated with new parameters

Since the function convert_to_iris_message now includes additional parameters usage: Optional[CompletionUsage] and model: str, please verify that all calls to this function across the codebase have been updated to match the new signature to prevent any potential errors.

You can run the following script to identify any calls that may need to be updated:

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Find all usages of `convert_to_iris_message` and check if they match the new signature.

# Expected: All calls should include three parameters: message, usage, and model.

rg --type python $'convert_to_iris_message\s*\(' -A 1

Length of output: 85


Script:

#!/bin/bash
# Description: Find all usages of `convert_to_iris_message` in Python files and check if they match the new signature.

# Expected: All calls should include three parameters: message, usage, and model.

rg --glob "*.py" 'convert_to_iris_message\s*\(' -A 1

Length of output: 723

"""
Convert a ChatCompletionMessage to a PyrisMessage
"""
num_input_tokens = getattr(usage, 'prompt_tokens', -1)
num_output_tokens = getattr(usage, 'completion_tokens', -1)
num_input_tokens = getattr(usage, "prompt_tokens", -1)
alexjoham marked this conversation as resolved.
Show resolved Hide resolved
num_output_tokens = getattr(usage, "completion_tokens", -1)
alexjoham marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Consider using 0 as the default value for token counts

The current implementation uses -1 as the default value for num_input_tokens and num_output_tokens. However, this might be confusing as it's not a valid token count. For consistency with other parts of the codebase (e.g., ollama) and improved clarity, consider using 0 as the default value.

Suggested change:

- num_input_tokens = getattr(usage, "prompt_tokens", -1)
- num_output_tokens = getattr(usage, "completion_tokens", -1)
+ num_input_tokens = getattr(usage, "prompt_tokens", 0)
+ num_output_tokens = getattr(usage, "completion_tokens", 0)

This change would make the default values more intuitive and consistent with other parts of the codebase.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
num_input_tokens = getattr(usage, "prompt_tokens", -1)
num_output_tokens = getattr(usage, "completion_tokens", -1)
num_input_tokens = getattr(usage, "prompt_tokens", 0)
num_output_tokens = getattr(usage, "completion_tokens", 0)


message = PyrisMessage(
sender=map_str_to_role(message.role),
contents=[TextMessageContentDTO(textContent=message.content)],
send_at=datetime.now(),
num_input_tokens=num_input_tokens,
num_output_tokens=num_output_tokens,
model_info=model
model_info=model,
)
return message


class OpenAIChatModel(ChatModel):
model: str
api_key: str
Expand Down Expand Up @@ -110,7 +113,9 @@ def chat(
temperature=arguments.temperature,
max_tokens=arguments.max_tokens,
)
return convert_to_iris_message(response.choices[0].message, response.usage, response.model)
return convert_to_iris_message(
response.choices[0].message, response.usage, response.model
)
except Exception as e:
wait_time = initial_delay * (backoff_factor**attempt)
logging.warning(f"Exception on attempt {attempt + 1}: {e}")
Expand Down
10 changes: 6 additions & 4 deletions app/llm/langchain/iris_langchain_chat_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ def _generate(
iris_message = self.request_handler.chat(iris_messages, self.completion_args)
base_message = convert_iris_message_to_langchain_message(iris_message)
chat_generation = ChatGeneration(message=base_message)
self.tokens = LLMTokenCount(model_info=iris_message.model_info,
num_input_tokens=iris_message.num_input_tokens,
num_output_tokens=iris_message.num_output_tokens,
pipeline=PipelineEnum.NOT_SET)
self.tokens = LLMTokenCount(
model_info=iris_message.model_info,
num_input_tokens=iris_message.num_input_tokens,
num_output_tokens=iris_message.num_output_tokens,
pipeline=PipelineEnum.NOT_SET,
)
alexjoham marked this conversation as resolved.
Show resolved Hide resolved
return ChatResult(generations=[chat_generation])

@property
Expand Down
12 changes: 8 additions & 4 deletions app/pipeline/chat/exercise_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
)
from langchain_core.runnables import Runnable
from langsmith import traceable, get_current_run_tree
from sipbuild.generator.parser.tokens import tokens
from weaviate.collections.classes.filters import Filter

from .code_feedback_pipeline import CodeFeedbackPipeline
Expand All @@ -35,7 +34,6 @@
from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO
from ...llm import CapabilityRequestHandler, RequirementList
from ...llm import CompletionArguments
from ...llm.external.LLMTokenCount import LLMTokenCount
from ...llm.external.PipelineEnum import PipelineEnum
from ...llm.langchain import IrisLangchainChatModel
from ...retrieval.lecture_retrieval import LectureRetrieval
Expand Down Expand Up @@ -102,7 +100,9 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
)
self._run_exercise_chat_pipeline(dto, should_execute_lecture_pipeline),
self.callback.done(
"Generated response", final_result=self.exercise_chat_response, tokens=self.tokens
"Generated response",
final_result=self.exercise_chat_response,
tokens=self.tokens,
)

try:
Expand All @@ -116,7 +116,11 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
suggestion_dto.last_message = self.exercise_chat_response
suggestion_dto.problem_statement = dto.exercise.problem_statement
suggestions = self.suggestion_pipeline(suggestion_dto)
self.callback.done(final_result=None, suggestions=suggestions, tokens=[self.suggestion_pipeline.tokens])
self.callback.done(
final_result=None,
suggestions=suggestions,
tokens=[self.suggestion_pipeline.tokens],
)
alexjoham marked this conversation as resolved.
Show resolved Hide resolved
else:
# This should never happen but whatever
self.callback.skip(
Expand Down
11 changes: 6 additions & 5 deletions app/pipeline/competency_extraction_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from langchain_core.prompts import (
ChatPromptTemplate,
)
from sipbuild.generator.parser.tokens import tokens

from app.domain import (
CompetencyExtractionPipelineExecutionDTO,
Expand Down Expand Up @@ -80,10 +79,12 @@ def __call__(
response = self.request_handler.chat(
[prompt], CompletionArguments(temperature=0.4)
)
num_tokens = LLMTokenCount(model_info=response.model_info,
num_input_tokens=response.num_input_tokens,
num_output_tokens=response.num_output_tokens,
pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION)
num_tokens = LLMTokenCount(
model_info=response.model_info,
num_input_tokens=response.num_input_tokens,
num_output_tokens=response.num_output_tokens,
pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION,
)
self.tokens.append(num_tokens)
response = response.contents[0].text_content

Expand Down
Loading