Skip to content

Commit

Permalink
Add leaderboard conversion script
Browse files Browse the repository at this point in the history
  • Loading branch information
Fabian Wolf committed Oct 24, 2024
1 parent 6aa032e commit 8fc7c88
Showing 1 changed file with 136 additions and 0 deletions.
136 changes: 136 additions & 0 deletions utils/leaderboard_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import json
import os
import pandas as pd
import zipfile
import re
from io import StringIO
import ast

def load_csvs_from_zip(zip_path):
"""
Load CSV files from a ZIP archive into pandas DataFrames.
Returns two DataFrames: one for LLM output and one for metrics.
Parameters:
zip_path (str): Path to the ZIP file
Returns:
tuple: (llm_output_df, metrics_df)
"""
llm_output_df = None
metrics_df = None

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# List all files in the ZIP
file_list = zip_ref.namelist()

# Find the relevant CSV files
llm_file = next((f for f in file_list if f.startswith('llm-output-')), None)
metrics_file = next((f for f in file_list if f.startswith('metrics_')), None)

# Load LLM output CSV
if llm_file:
with zip_ref.open(llm_file) as f:
content = f.read().decode('utf-8')
llm_output_df = pd.read_csv(StringIO(content))

# Load metrics CSV
if metrics_file:
with zip_ref.open(metrics_file) as f:
content = f.read().decode('utf-8')
metrics_df = pd.read_csv(StringIO(content))

return llm_output_df, metrics_df

# -------------------------------------------------------------------------------------------------------------- #
# Edit this part to your needs
# Generation Metadata
model_name_experiment = "LLaMA 3.1 8B Instruct Q5_K_M"
model_name_huggingface = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
model_quant = "q5km"
model_params = 3 # in BILLION parameters

# Add all experiments for one model. Can contain multiple metrics per experiment
experiments = [
{
"filepath": "/Users/fabianwolf/Downloads/metrics_LLaMA3.18BInstructQ5-K-M_202410141018_3j5i_Kvh0NQ.zip",
"name": "exp1",
"metrics": [
{
"name": "accuracy", # how the metric should be called in the leaderboard
"csv_key": "accuracy", # how the metric is called in the metrics csv
"labels": "all" # implement later TODO
}
]
}
]


output_dir = "/Users/fabianwolf/Download/leaderboard_output"

# -------------------------------------------------------------------------------------------------------------- #


os.makedirs(output_dir, exist_ok=True)


# Generate the following json:

# {
# "config": {
# "model_dtype": "q8",
# "model_params": 3,
# "model_name": "bartowski/Meta-Llama-3.1-3B-Instruct-GGUF",
# "model_sha": "",
# "model_type": "LLM"
# },
# "results": {
# "exp1": {
# "acc": 0.9
# },
# "exp2": {
# "acc_norm": 0.95
# }
# }
# }

leaderboard_json = {
"config": {
"model_dtype": model_quant,
"model_params": model_params,
"model_name": model_name_huggingface,
"model_sha": "",
"model_type": "LLM"
},
"results": {
}
}

for experiment in experiments:
llm_df, metrics_df = load_csvs_from_zip(experiment["filepath"])

metadata = ast.literal_eval(llm_df.iloc[0]['metadata'])
assert metadata['llm_processing']['model_name'] == model_name_experiment

metric_dict = {}

for metric in experiment["metrics"]:
csv_key = metric["csv_key"]
# get row where id is macro_scores
row = metrics_df[metrics_df["id"] == "macro_scores"].iloc[0]
# get all column names which have the csv_key as suffix in their name after a $ sign, take all values of those in the row and average them
metric_columns = [col for col in row.index if csv_key in col and "$" in col]

numeric_values = pd.to_numeric(row[metric_columns], errors='coerce')
metric_dict[metric["name"]] = numeric_values.mean()

# also add all individual values of the metric_columns to the metric_dict as metric['name']_namebeforethedollarsign
for col in metric_columns:
metric_dict[metric["name"] + "_" + col.split("$")[0]] = float(row[col])


leaderboard_json["results"][experiment["name"]] = metric_dict

# save json to file
with open(os.path.join(output_dir, f"leaderboard_{model_name_huggingface.replace('/', '_')}.json"), "w") as f:
json.dump(leaderboard_json, f)

0 comments on commit 8fc7c88

Please sign in to comment.