Skip to content

Commit

Permalink
Merge pull request #4 from drudilorenzo/feature-json-logging
Browse files Browse the repository at this point in the history
Feat: json logging
  • Loading branch information
drudilorenzo authored Apr 14, 2024
2 parents 8111a05 + 21eb4b5 commit 9563b67
Showing 7 changed files with 156 additions and 63 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -19,4 +19,7 @@ cost-logs/
*.egg-info/

build/
dist/
dist/

# demo internal
demo_internal.ipynb
22 changes: 19 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
@@ -26,10 +26,26 @@ How to install:

Key Features:
-------------
* Track the cost of every request you make to OpenAI and save them in a csv file.
* Visualize the cost of all the requests you have made.
* Track the cost of every request you make and save them in a JSON file.
* Choose the feature you want to track (prompt_tokens, completion_tokens, completion, prompt, etc.).
* Check the cost of your requests filtering by model or strftime aggregation (see the docs).

Endpoint supported:
-------------------
* Chat completion.
* Every endpoint which response contains the field "*usage.prompt_tokens*" and "*usage.completion_tokens*".
* Every response passed to *OpenAICostLogger* should contain the fields "*usage.prompt_tokens*" and "*usage.completion_tokens*".
This is the only strict requirement of the library, the way you call the OpenAI API is totally up to you. If needed, you can
find an easy example in the demo file.

Viz examples:
-------------
.. image::images/viz_prints.png
:alt: Viz prints examples.
:align: center
:width: 500px

.. image::images/strftime_agg.png
:alt: Strftime aggregation example.
:align: center
:width: 500px

29 changes: 11 additions & 18 deletions changes_proposal.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
1. ✅ cost tracker handles completion creation - PR ready
1. ⌛ model has to be provided in form of enum - important, hard to juggle with all 0xxx versions

Change:
- we can just infer it from `response.model`
- removes possible problems with choosing the right enum or forgetting to change it while changing the model for experiment

2. ⌛ allow for experiment/subexperiment stats

3. ✅ cost tracker handles completion creation - Merged

Change: separating completion and cost tracking, by changing the main functionality from `chat_completion` to `update_cost`

@@ -7,16 +15,7 @@ Motivation:
- allows easier integration, user only has to initialize tracker object and call `update_cost(response)`,
otherwise each chat completion call would have to be rewritten

2. ⌛ costs are calculated across all log files

Change:
- static `total_cost` that will calculate total spending from logs
- static `experiment_cost(experiment_name=self.experiment_name)` gets you total cost of specific experiment
- defaulting to current experiment_name in tracker object
- if object not initialized, experiment_name has to be provided
- `cost` that gets you costs for current run of this tracker object

3. ⌛ log file just acumulates total cost
4. ✅ log file just acumulates total cost

Change:
- add breakdown of responses/input token per response/output token per response/cost per response
@@ -40,13 +39,7 @@ Change:
}
```
4. ⌛ model has to be provided in form of enum
Change:
- we can just infer it from `response.model`
- removes possible problems with choosing the right enum or forgetting to change it while changing the model for experiment
5. ✅ datetime strftime format - PR ready
5. ✅ datetime strftime format - Merged
Change:
- change strftime format to `strftime("%Y-%m-%d_%H:%M:%S")`, makes it more readable
Binary file added images/strftime_agg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/viz_prints.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
82 changes: 71 additions & 11 deletions openai_cost_logger/openai_cost_logger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import json
from typing import Dict
from pathlib import Path
from time import strftime
@@ -16,7 +17,7 @@
"cost"
]

"""OpenAI cost logger"""
"""OpenAI cost logger."""
class OpenAICostLogger:
def __init__(
self,
@@ -26,6 +27,7 @@ def __init__(
experiment_name: str,
cost_upperbound: float = float('inf'),
log_folder: str = DEFAULT_LOG_PATH,
log_level: str = "detail"
):
"""Initialize the cost logger.
@@ -40,13 +42,21 @@ def __init__(
client_args (Dict, optional): The parameters to pass to the client. Defaults to {}.
"""
self.cost = 0
self.n_responses = 0
self.model = model
self.input_cost = input_cost
self.log_folder = log_folder
self.output_cost = output_cost
self.experiment_name = experiment_name
self.cost_upperbound = cost_upperbound
self.filename = f"{experiment_name}_cost_" + strftime("%Y-%m-%d_%H:%M:%S") + ".csv"
self.log_level = log_level
self.creation_datetime = strftime("%Y-%m-%d_%H:%M:%S")
self.filename = f"{experiment_name}_{self.creation_datetime}.json"
self.filepath = Path(self.log_folder, self.filename)

self.__check_existance_log_folder()
self.__build_log_file()


def update_cost(self, response: ChatCompletion) -> None:
"""Extract the number of input and output tokens from a chat completion response
@@ -56,15 +66,10 @@ def update_cost(self, response: ChatCompletion) -> None:
response: ChatCompletion object from the model.
"""
self.cost += self.__get_answer_cost(response)
self.n_responses += 1
self.__write_cost_to_json(response)
self.__validate_cost()
path = Path(self.log_folder, self.filename)
path.parent.mkdir(parents=True, exist_ok=True)

# Be careful, it overwrites the file if it already exists
with open(path, mode='w') as file:
csvwriter = csv.writer(file)
csvwriter.writerow(FILE_HEADER)
csvwriter.writerow([self.experiment_name, self.model, self.cost])


def get_current_cost(self) -> float:
"""Get the current cost of the cost tracker.
@@ -74,6 +79,7 @@ def get_current_cost(self) -> float:
"""
return self.cost


def __get_answer_cost(self, answer: Dict) -> float:
"""Calculate the cost of the answer based on the input and output tokens.
@@ -85,11 +91,65 @@ def __get_answer_cost(self, answer: Dict) -> float:
return (self.input_cost * answer.usage.prompt_tokens) / COST_UNIT + \
(self.output_cost * answer.usage.completion_tokens) / COST_UNIT


def __validate_cost(self):
"""Check if the cost exceeds the upperbound and raise an exception if it does.
Raises:
Exception: If the cost exceeds the upperbound.
"""
if self.cost > self.cost_upperbound:
raise Exception(f"Cost exceeded upperbound: {self.cost} > {self.cost_upperbound}")
raise Exception(f"Cost exceeded upperbound: {self.cost} > {self.cost_upperbound}")


def __write_cost_to_json(self, response: ChatCompletion) -> None:
"""Write the cost to a json file.
Args:
response (ChatCompletion): The response from the model.
"""
with open(self.filepath, 'r') as file:
data = json.load(file)
data["total_cost"] = self.cost
data["total_responses"] = self.n_responses
data["breakdown"].append(self.__build_log_breadown_entry(response))
with open(self.filepath, 'w') as file:
json.dump(data, file, indent=4)


def __check_existance_log_folder(self) -> None:
"""Check if the log folder exists and create it if it does not."""
self.filepath.parent.mkdir(parents=True, exist_ok=True)


def __build_log_file(self) -> None:
"""Create the log file with the header."""
log_file_template = {
"experiment_name": self.experiment_name,
"creation_datetime": strftime("%Y-%m-%d %H:%M:%S"),
"model": self.model,
"total_cost": self.cost,
"total_responses": 0,
"breakdown": []
}
with open(self.filepath, 'w') as file:
json.dump(log_file_template, file, indent=4)


def __build_log_breadown_entry(self, response: ChatCompletion) -> Dict:
"""Build a json log entry for the breakdown of the cost.
Args:
response (ChatCompletion): The response from the model.
Returns:
Dict: The json log entry.
"""
return {
"cost": self.__get_answer_cost(response),
"input_tokens": response.usage.prompt_tokens,
"output_tokens": response.usage.completion_tokens,
"content": response.choices[0].message.content,
"inferred_model": response.model,
"datetime": strftime("%Y-%m-%d %H:%M:%S"),
}
81 changes: 51 additions & 30 deletions openai_cost_logger/openai_cost_logger_viz.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import csv
import json
from datetime import datetime
from typing import Dict
from pathlib import Path
import matplotlib.pyplot as plt
from collections import defaultdict

from openai_cost_logger.constants import DEFAULT_LOG_PATH

"""Cost logger visualizer."""
class OpenAICostLoggerViz:

@staticmethod
@@ -21,13 +23,13 @@ def get_total_cost(path: str = DEFAULT_LOG_PATH) -> float:
"""
cost = 0
for filename in os.listdir(path):
with open(Path(path, filename), mode='r') as file:
csvreader = csv.reader(file)
next(csvreader)
for row in csvreader:
cost += float(row[2])
if filename.endswith(".json"):
with open(Path(path, filename), mode='r') as file:
data = json.load(file)
cost += data["total_cost"]
return cost


@staticmethod
def print_total_cost(path: str = DEFAULT_LOG_PATH) -> None:
"""Print the total cost of all the logs in the directory.
@@ -36,9 +38,9 @@ def print_total_cost(path: str = DEFAULT_LOG_PATH) -> None:
log_folder (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH.
This method reads all the files in the specified directory.
"""

print(f"Total cost: {round(OpenAICostLoggerViz.get_total_cost(path), 6)} (USD)")



@staticmethod
def get_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> Dict[str, float]:
"""Return the total cost by model of all the logs in the directory.
@@ -52,15 +54,15 @@ def get_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> Dict[str, float]:
"""
cost_by_model = defaultdict(float)
for filename in os.listdir(path):
with open(Path(path, filename), mode='r') as file:
csvreader = csv.reader(file)
next(csvreader)
for row in csvreader:
if row[1] not in cost_by_model:
cost_by_model[row[1]] = 0
cost_by_model[row[1]] += float(row[2])
if filename.endswith(".json"):
with open(Path(path, filename), mode='r') as file:
data = json.load(file)
if data["model"] not in cost_by_model:
cost_by_model[data["model"]] = 0
cost_by_model[data["model"]] += data["total_cost"]
return cost_by_model



def print_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> None:
"""Print the total cost by model of all the logs in the directory.
@@ -71,31 +73,50 @@ def print_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> None:
cost_by_model = OpenAICostLoggerViz.get_total_cost_by_model(path)
for model, cost in cost_by_model.items():
print(f"{model}: {round(cost, 6)} (USD)")



@staticmethod
def plot_cost_by_day(path: str = DEFAULT_LOG_PATH, last_n_days: int = None) -> None:
"""Plot the cost by day of all the logs in the directory.
def plot_cost_by_strftime(path: str = DEFAULT_LOG_PATH, strftime_aggregator: str = "%Y-%m-%d", last_n_days: int = None) -> None:
"""Plot the cost by day of all the logs in the directory aggregated using strftime_aggregator.
Args:
path (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH.
This method reads all the files in the specified directory.
last_n_days (int, optional): The number of last days to plot. Defaults to None.
"""
cost_by_day = defaultdict(float)
cost_by_aggregation_key = defaultdict(float)
for filename in os.listdir(path):
with open(Path(path, filename), mode='r') as file:
csvreader = csv.reader(file)
next(csvreader)
for row in csvreader:
day = filename.split("_")[2]
cost_by_day[day] += float(row[2])
if filename.endswith(".json"):
with open(Path(path, filename), mode='r') as file:
data = json.load(file)
creation_datetime = datetime.strptime(data["creation_datetime"], "%Y-%m-%d %H:%M:%S")
aggregation_key = creation_datetime.strftime(strftime_aggregator)
cost_by_aggregation_key[aggregation_key] += data["total_cost"]

cost_by_day = dict(sorted(cost_by_day.items(), key=lambda x: x[0]))
cost_by_aggregation_key = dict(sorted(cost_by_aggregation_key.items(), key=lambda x: x[0]))
if last_n_days:
cost_by_day = dict(list(cost_by_day.items())[-last_n_days:])
cost_by_aggregation_key = dict(list(cost_by_aggregation_key.items())[-last_n_days:])

plt.bar(cost_by_day.keys(), cost_by_day.values(), width=0.5)
plt.bar(cost_by_aggregation_key.keys(), cost_by_aggregation_key.values(), width=0.5)
plt.xticks(rotation=30, fontsize=8)
plt.xlabel('Day')
plt.ylabel('Cost [$]')
plt.title('Cost by day')
plt.show()
plt.tight_layout()
plt.show()


@staticmethod
def plot_cost_by_day(path: str = DEFAULT_LOG_PATH, last_n_days: int = None) -> None:
"""Plot the cost by day of all the logs in the directory.
Args:
path (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH.
This method reads all the files in the specified directory.
last_n_days (int, optional): The number of last days to plot. Defaults to None.
"""
OpenAICostLoggerViz.plot_cost_by_strftime(
path=path,
strftime_aggregator="%Y-%m-%d",
last_n_days=last_n_days
)

0 comments on commit 9563b67

Please sign in to comment.