diff --git a/.gitignore b/.gitignore index e8f9ccc..793ec1f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,7 @@ cost-logs/ *.egg-info/ build/ -dist/ \ No newline at end of file +dist/ + +# demo internal +demo_internal.ipynb diff --git a/README.rst b/README.rst index 8d65c80..8f4e762 100644 --- a/README.rst +++ b/README.rst @@ -26,10 +26,26 @@ How to install: Key Features: ------------- -* Track the cost of every request you make to OpenAI and save them in a csv file. -* Visualize the cost of all the requests you have made. +* Track the cost of every request you make and save them in a JSON file. +* Choose the feature you want to track (prompt_tokens, completion_tokens, completion, prompt, etc.). +* Check the cost of your requests filtering by model or strftime aggregation (see the docs). Endpoint supported: ------------------- * Chat completion. -* Every endpoint which response contains the field "*usage.prompt_tokens*" and "*usage.completion_tokens*". +* Every response passed to *OpenAICostLogger* should contain the fields "*usage.prompt_tokens*" and "*usage.completion_tokens*". + This is the only strict requirement of the library, the way you call the OpenAI API is totally up to you. If needed, you can + find an easy example in the demo file. + +Viz examples: +------------- +.. image::images/viz_prints.png + :alt: Viz prints examples. + :align: center + :width: 500px + +.. image::images/strftime_agg.png + :alt: Strftime aggregation example. + :align: center + :width: 500px + diff --git a/changes_proposal.md b/changes_proposal.md index 60dd0bc..dd34342 100644 --- a/changes_proposal.md +++ b/changes_proposal.md @@ -1,4 +1,12 @@ -1. ✅ cost tracker handles completion creation - PR ready +1. ⌛ model has to be provided in form of enum - important, hard to juggle with all 0xxx versions + +Change: + - we can just infer it from `response.model` + - removes possible problems with choosing the right enum or forgetting to change it while changing the model for experiment + +2. ⌛ allow for experiment/subexperiment stats + +3. ✅ cost tracker handles completion creation - Merged Change: separating completion and cost tracking, by changing the main functionality from `chat_completion` to `update_cost` @@ -7,16 +15,7 @@ Motivation: - allows easier integration, user only has to initialize tracker object and call `update_cost(response)`, otherwise each chat completion call would have to be rewritten -2. ⌛ costs are calculated across all log files - -Change: - - static `total_cost` that will calculate total spending from logs - - static `experiment_cost(experiment_name=self.experiment_name)` gets you total cost of specific experiment - - defaulting to current experiment_name in tracker object - - if object not initialized, experiment_name has to be provided - - `cost` that gets you costs for current run of this tracker object - -3. ⌛ log file just acumulates total cost +4. ✅ log file just acumulates total cost Change: - add breakdown of responses/input token per response/output token per response/cost per response @@ -40,13 +39,7 @@ Change: } ``` -4. ⌛ model has to be provided in form of enum - -Change: - - we can just infer it from `response.model` - - removes possible problems with choosing the right enum or forgetting to change it while changing the model for experiment - -5. ✅ datetime strftime format - PR ready +5. ✅ datetime strftime format - Merged Change: - change strftime format to `strftime("%Y-%m-%d_%H:%M:%S")`, makes it more readable diff --git a/images/strftime_agg.png b/images/strftime_agg.png new file mode 100644 index 0000000..ef23868 Binary files /dev/null and b/images/strftime_agg.png differ diff --git a/images/viz_prints.png b/images/viz_prints.png new file mode 100644 index 0000000..76153a7 Binary files /dev/null and b/images/viz_prints.png differ diff --git a/openai_cost_logger/openai_cost_logger.py b/openai_cost_logger/openai_cost_logger.py index ed12181..5bd64f0 100644 --- a/openai_cost_logger/openai_cost_logger.py +++ b/openai_cost_logger/openai_cost_logger.py @@ -1,4 +1,5 @@ import csv +import json from typing import Dict from pathlib import Path from time import strftime @@ -16,7 +17,7 @@ "cost" ] -"""OpenAI cost logger""" +"""OpenAI cost logger.""" class OpenAICostLogger: def __init__( self, @@ -26,6 +27,7 @@ def __init__( experiment_name: str, cost_upperbound: float = float('inf'), log_folder: str = DEFAULT_LOG_PATH, + log_level: str = "detail" ): """Initialize the cost logger. @@ -40,13 +42,21 @@ def __init__( client_args (Dict, optional): The parameters to pass to the client. Defaults to {}. """ self.cost = 0 + self.n_responses = 0 self.model = model self.input_cost = input_cost self.log_folder = log_folder self.output_cost = output_cost self.experiment_name = experiment_name self.cost_upperbound = cost_upperbound - self.filename = f"{experiment_name}_cost_" + strftime("%Y-%m-%d_%H:%M:%S") + ".csv" + self.log_level = log_level + self.creation_datetime = strftime("%Y-%m-%d_%H:%M:%S") + self.filename = f"{experiment_name}_{self.creation_datetime}.json" + self.filepath = Path(self.log_folder, self.filename) + + self.__check_existance_log_folder() + self.__build_log_file() + def update_cost(self, response: ChatCompletion) -> None: """Extract the number of input and output tokens from a chat completion response @@ -56,15 +66,10 @@ def update_cost(self, response: ChatCompletion) -> None: response: ChatCompletion object from the model. """ self.cost += self.__get_answer_cost(response) + self.n_responses += 1 + self.__write_cost_to_json(response) self.__validate_cost() - path = Path(self.log_folder, self.filename) - path.parent.mkdir(parents=True, exist_ok=True) - - # Be careful, it overwrites the file if it already exists - with open(path, mode='w') as file: - csvwriter = csv.writer(file) - csvwriter.writerow(FILE_HEADER) - csvwriter.writerow([self.experiment_name, self.model, self.cost]) + def get_current_cost(self) -> float: """Get the current cost of the cost tracker. @@ -74,6 +79,7 @@ def get_current_cost(self) -> float: """ return self.cost + def __get_answer_cost(self, answer: Dict) -> float: """Calculate the cost of the answer based on the input and output tokens. @@ -85,6 +91,7 @@ def __get_answer_cost(self, answer: Dict) -> float: return (self.input_cost * answer.usage.prompt_tokens) / COST_UNIT + \ (self.output_cost * answer.usage.completion_tokens) / COST_UNIT + def __validate_cost(self): """Check if the cost exceeds the upperbound and raise an exception if it does. @@ -92,4 +99,57 @@ def __validate_cost(self): Exception: If the cost exceeds the upperbound. """ if self.cost > self.cost_upperbound: - raise Exception(f"Cost exceeded upperbound: {self.cost} > {self.cost_upperbound}") \ No newline at end of file + raise Exception(f"Cost exceeded upperbound: {self.cost} > {self.cost_upperbound}") + + + def __write_cost_to_json(self, response: ChatCompletion) -> None: + """Write the cost to a json file. + + Args: + response (ChatCompletion): The response from the model. + """ + with open(self.filepath, 'r') as file: + data = json.load(file) + data["total_cost"] = self.cost + data["total_responses"] = self.n_responses + data["breakdown"].append(self.__build_log_breadown_entry(response)) + with open(self.filepath, 'w') as file: + json.dump(data, file, indent=4) + + + def __check_existance_log_folder(self) -> None: + """Check if the log folder exists and create it if it does not.""" + self.filepath.parent.mkdir(parents=True, exist_ok=True) + + + def __build_log_file(self) -> None: + """Create the log file with the header.""" + log_file_template = { + "experiment_name": self.experiment_name, + "creation_datetime": strftime("%Y-%m-%d %H:%M:%S"), + "model": self.model, + "total_cost": self.cost, + "total_responses": 0, + "breakdown": [] + } + with open(self.filepath, 'w') as file: + json.dump(log_file_template, file, indent=4) + + + def __build_log_breadown_entry(self, response: ChatCompletion) -> Dict: + """Build a json log entry for the breakdown of the cost. + + Args: + response (ChatCompletion): The response from the model. + + Returns: + Dict: The json log entry. + """ + return { + "cost": self.__get_answer_cost(response), + "input_tokens": response.usage.prompt_tokens, + "output_tokens": response.usage.completion_tokens, + "content": response.choices[0].message.content, + "inferred_model": response.model, + "datetime": strftime("%Y-%m-%d %H:%M:%S"), + } \ No newline at end of file diff --git a/openai_cost_logger/openai_cost_logger_viz.py b/openai_cost_logger/openai_cost_logger_viz.py index 80ddf3c..bfc3e24 100644 --- a/openai_cost_logger/openai_cost_logger_viz.py +++ b/openai_cost_logger/openai_cost_logger_viz.py @@ -1,5 +1,6 @@ import os -import csv +import json +from datetime import datetime from typing import Dict from pathlib import Path import matplotlib.pyplot as plt @@ -7,6 +8,7 @@ from openai_cost_logger.constants import DEFAULT_LOG_PATH +"""Cost logger visualizer.""" class OpenAICostLoggerViz: @staticmethod @@ -21,13 +23,13 @@ def get_total_cost(path: str = DEFAULT_LOG_PATH) -> float: """ cost = 0 for filename in os.listdir(path): - with open(Path(path, filename), mode='r') as file: - csvreader = csv.reader(file) - next(csvreader) - for row in csvreader: - cost += float(row[2]) + if filename.endswith(".json"): + with open(Path(path, filename), mode='r') as file: + data = json.load(file) + cost += data["total_cost"] return cost + @staticmethod def print_total_cost(path: str = DEFAULT_LOG_PATH) -> None: """Print the total cost of all the logs in the directory. @@ -36,9 +38,9 @@ def print_total_cost(path: str = DEFAULT_LOG_PATH) -> None: log_folder (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH. This method reads all the files in the specified directory. """ - print(f"Total cost: {round(OpenAICostLoggerViz.get_total_cost(path), 6)} (USD)") - + + @staticmethod def get_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> Dict[str, float]: """Return the total cost by model of all the logs in the directory. @@ -52,15 +54,15 @@ def get_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> Dict[str, float]: """ cost_by_model = defaultdict(float) for filename in os.listdir(path): - with open(Path(path, filename), mode='r') as file: - csvreader = csv.reader(file) - next(csvreader) - for row in csvreader: - if row[1] not in cost_by_model: - cost_by_model[row[1]] = 0 - cost_by_model[row[1]] += float(row[2]) + if filename.endswith(".json"): + with open(Path(path, filename), mode='r') as file: + data = json.load(file) + if data["model"] not in cost_by_model: + cost_by_model[data["model"]] = 0 + cost_by_model[data["model"]] += data["total_cost"] return cost_by_model - + + def print_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> None: """Print the total cost by model of all the logs in the directory. @@ -71,31 +73,50 @@ def print_total_cost_by_model(path: str = DEFAULT_LOG_PATH) -> None: cost_by_model = OpenAICostLoggerViz.get_total_cost_by_model(path) for model, cost in cost_by_model.items(): print(f"{model}: {round(cost, 6)} (USD)") - + + @staticmethod - def plot_cost_by_day(path: str = DEFAULT_LOG_PATH, last_n_days: int = None) -> None: - """Plot the cost by day of all the logs in the directory. + def plot_cost_by_strftime(path: str = DEFAULT_LOG_PATH, strftime_aggregator: str = "%Y-%m-%d", last_n_days: int = None) -> None: + """Plot the cost by day of all the logs in the directory aggregated using strftime_aggregator. Args: path (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH. This method reads all the files in the specified directory. last_n_days (int, optional): The number of last days to plot. Defaults to None. """ - cost_by_day = defaultdict(float) + cost_by_aggregation_key = defaultdict(float) for filename in os.listdir(path): - with open(Path(path, filename), mode='r') as file: - csvreader = csv.reader(file) - next(csvreader) - for row in csvreader: - day = filename.split("_")[2] - cost_by_day[day] += float(row[2]) + if filename.endswith(".json"): + with open(Path(path, filename), mode='r') as file: + data = json.load(file) + creation_datetime = datetime.strptime(data["creation_datetime"], "%Y-%m-%d %H:%M:%S") + aggregation_key = creation_datetime.strftime(strftime_aggregator) + cost_by_aggregation_key[aggregation_key] += data["total_cost"] - cost_by_day = dict(sorted(cost_by_day.items(), key=lambda x: x[0])) + cost_by_aggregation_key = dict(sorted(cost_by_aggregation_key.items(), key=lambda x: x[0])) if last_n_days: - cost_by_day = dict(list(cost_by_day.items())[-last_n_days:]) + cost_by_aggregation_key = dict(list(cost_by_aggregation_key.items())[-last_n_days:]) - plt.bar(cost_by_day.keys(), cost_by_day.values(), width=0.5) + plt.bar(cost_by_aggregation_key.keys(), cost_by_aggregation_key.values(), width=0.5) + plt.xticks(rotation=30, fontsize=8) plt.xlabel('Day') plt.ylabel('Cost [$]') plt.title('Cost by day') - plt.show() \ No newline at end of file + plt.tight_layout() + plt.show() + + + @staticmethod + def plot_cost_by_day(path: str = DEFAULT_LOG_PATH, last_n_days: int = None) -> None: + """Plot the cost by day of all the logs in the directory. + + Args: + path (str, optional): Cost logs directory. Defaults to DEFAULT_LOG_PATH. + This method reads all the files in the specified directory. + last_n_days (int, optional): The number of last days to plot. Defaults to None. + """ + OpenAICostLoggerViz.plot_cost_by_strftime( + path=path, + strftime_aggregator="%Y-%m-%d", + last_n_days=last_n_days + ) \ No newline at end of file