From f239f0eaf1a3ebb77efa27b4571cc3acd4ea421a Mon Sep 17 00:00:00 2001 From: kyleclo Date: Tue, 26 Nov 2024 17:51:16 -0800 Subject: [PATCH 1/5] flops by perf figure --- pyproject.toml | 3 + scripts/flops_by_perf_figure.py | 235 ++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 scripts/flops_by_perf_figure.py diff --git a/pyproject.toml b/pyproject.toml index d8b339c19..2b27a7074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,9 @@ train = [ all = [ "ai2-olmo[dev,train]", ] +figures = [ + "matplotlib", +] [project.urls] Homepage = "https://github.com/allenai/OLMo" diff --git a/scripts/flops_by_perf_figure.py b/scripts/flops_by_perf_figure.py new file mode 100644 index 000000000..51499ab7c --- /dev/null +++ b/scripts/flops_by_perf_figure.py @@ -0,0 +1,235 @@ +""" + +Plotting the FLOPS by performance figure + +Don't forget to run `pip install -e '.[figures]'` to install the necessary dependencies. + +@kylel + +""" + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +RESULTS_DATA_PATH = "/Users/kylel/ai2/olmo/olmo2.csv" +OUTPUT_PATHS = ["/Users/kylel/ai2/olmo/olmo2.pdf", "/Users/kylel/ai2/olmo/olmo2.png"] +df = pd.read_csv(RESULTS_DATA_PATH) + +# don't count Model, Flops, and Average columns +num_datasets = len(df.columns) - 3 + +MODEL_COLUMN_NAME = "Model" +CATEGORY_COLUMN_NAME = "category" +FLOPS_COLUMN_NAME = "FLOPs" +METRIC_COLUMN_NAME = "Average" +COLOR_COLUMN_NAME = "color" +OFFSET_COLUMN_NAME = "label_offset" +MARKER_COLUMN_NAME = "marker" + +# remove Zamba model (SSM, not a language model) +df = df[df[MODEL_COLUMN_NAME] != "Zamba-2-7B"] + +model_name_to_open_status = { + "Amber-7B": "Other fully open", + "DCLM-7B": "Other fully open", + "Mistral-7B-v0.3": "Open weights", + "Mistral-Nemo-Bs-12B": "Open weights", + "Gemma-2-9B": "Open weights", + "Llama-2-13B": "Open weights", + "Llama-3.1-8B": "Open weights", + "MAP-Neo-7B": "Other fully open", + "Zamba-2-7B": "Partially open", + "OLMo-0424-7B": "Previous OLMo", + "OLMo-2-1124-13B": "Latest OLMo", + "OLMo-2-1124-7B": "Latest OLMo", + "OLMo-7B": "Previous OLMo", + "Qwen-2.5-14B": "Open weights", + "Qwen-2.5-7B": "Open weights", + "StableLM-2-12B": "Partially open", +} + +# Add a column for model category based on the groupings +df[CATEGORY_COLUMN_NAME] = df[MODEL_COLUMN_NAME].map(model_name_to_open_status) + +# Add a column for color based on the category +categories = df["category"].unique() +category_to_color = { + "Open weights": "#093235", # dark blue + "Partially open": "#255457", # dark green + "Other fully open": "#6FE0BA", # light green + "Previous OLMo": "#F697C4", # light pink + "Latest OLMo": "#F0529C", # dark pink +} + +df[COLOR_COLUMN_NAME] = df[CATEGORY_COLUMN_NAME].map(category_to_color) + +model_name_to_label_offset = { + "Amber-7B": [10, -2], # Move left and down to use empty space + "DCLM-7B": [-20, 10], # Move right and up into empty area + "Mistral-7B-v0.3": [-20, 8], # Move left and up + "Mistral-Nemo-Bs-12B": [20, -8], # Move right and down + "Gemma-2-9B": [-35, -15], # Move left and down + "Llama-2-13B": [0, 10], # Move right and slightly up + "Llama-3.1-8B": [0, -15], # Move right and down + "MAP-Neo-7B": [-20, -15], # Move left and down into empty space + "Zamba-2-7B": [-25, 10], # Move left and up + "OLMo-0424-7B": [-35, -15], # Move right and slightly down + "OLMo-2-1124-13B": [-20, 10], # Move left and up + "OLMo-2-1124-7B": [-35, 10], # Move right + "OLMo-7B": [-35, 10], # Move left and up into empty space + "Qwen-2.5-14B": [-40, -15], # Move right and up + "Qwen-2.5-7B": [-20, -15], # Move left and down + "StableLM-2-12B": [0, -15], # Move right and up +} + +df[OFFSET_COLUMN_NAME] = df[MODEL_COLUMN_NAME].map(model_name_to_label_offset) + +# markers +category_to_marker = { + "Open weights": "o", + "Partially open": "D", + "Other fully open": "s", + "Previous OLMo": "P", + "Latest OLMo": "*", +} + +# Clean up labels +# rename models +model_name_to_new_name = { + "OLMo-2-1124-13B": "OLMo-2-13B", + "OLMo-2-1124-7B": "OLMo-2-7B", +} +df[MODEL_COLUMN_NAME] = df[MODEL_COLUMN_NAME].replace(model_name_to_new_name) + + +# marker size +category_to_marker_size = { + "Open weights": 40, + "Partially open": 40, + "Other fully open": 70, + "Previous OLMo": 100, + "Latest OLMo": 150, +} + +# alpha +category_to_alpha = { + "Open weights": 1.0, + "Partially open": 0.7, + "Other fully open": 1.0, + "Previous OLMo": 1.0, + "Latest OLMo": 1.0, +} + + +# Scale +# plt.xscale("log") +plt.xscale("function", functions=(np.sqrt, np.square)) + + +# Plotting order +desired_order = ["Latest OLMo", "Previous OLMo", "Other fully open", "Partially open", "Open weights"] +for category in categories: + mask = (df[CATEGORY_COLUMN_NAME] == category) & (df[FLOPS_COLUMN_NAME].notna()) + data = df[mask] + plt.scatter( + data[FLOPS_COLUMN_NAME], + data[METRIC_COLUMN_NAME], + label=category, + c=data[COLOR_COLUMN_NAME], # Use the colors column + marker=category_to_marker[category], # Add the marker parameter + alpha=category_to_alpha[category], + s=category_to_marker_size[category], + ) +# Add labels for each point +FONTSIZE = 9 +for idx, row in df[df[FLOPS_COLUMN_NAME].notna()].iterrows(): + plt.annotate( + row[MODEL_COLUMN_NAME], + (row[FLOPS_COLUMN_NAME], row[METRIC_COLUMN_NAME]), + xytext=(row[OFFSET_COLUMN_NAME]), + textcoords="offset points", + fontsize=FONTSIZE, + alpha=1.0, + ) + +# x axis tick marks +tick_locations = [4e22, 6e22, 8e22, 1e23, 2e23, 4e23, 6e23, 8e23, 1e24, 2e24] + + +# Function to format numbers in scientific notation (10^x) more intuitively +def format_scientific(x): + exponent = int(np.log10(x)) # Get the exponent + mantissa = x / (10**exponent) # Get the mantissa + + # Format as "1×10²²", "2×10²²", etc. + return f"{int(mantissa)}×10{str(exponent).translate(str.maketrans('0123456789', '⁰¹²³⁴⁵⁶⁷⁸⁹'))}" + + +tick_labels = [format_scientific(x) for x in tick_locations] +plt.xticks(tick_locations, tick_labels, rotation=45, ha="right") + + +# Customize the plot +plt.xlabel("Approximate FLOPs", fontsize=12) +plt.ylabel(f"Avg Performance ({num_datasets} Benchmarks)", fontsize=12) +# plt.title("FLOPs vs Performance", fontsize=14, pad=20) + +# Add grid +plt.grid(True, which="both", ls="-", alpha=0.2) + +# Customize legend +# plt.legend(title="", title_fontsize=10, fontsize=10, bbox_to_anchor=(1.05, 1), loc="upper left") + +# Add the legend below the plot +handles, labels = plt.gca().get_legend_handles_labels() +label_to_handle = dict(zip(labels, handles)) +ordered_handles = [label_to_handle[label] for label in desired_order] +plt.legend( + ordered_handles, + desired_order, + bbox_to_anchor=(0, 1.02, 1.0, 0.2), + loc="center", + ncol=len(categories), + mode="expand", + borderaxespad=0.0, + fontsize=8, + handletextpad=0.05, # Reduce space between marker and label + columnspacing=0.5, # Adjust space between columns +) + +# Adjust the layout to prevent legend cutoff +plt.tight_layout() +plt.subplots_adjust(top=0.8) # Make room for the legend + + +# Remove spines +plt.gca().spines["top"].set_visible(False) +plt.gca().spines["right"].set_visible(False) + +# Make Yellow portion +xmin, xmax = plt.gca().get_xlim() +ymin, ymax = plt.gca().get_ylim() + +# Assuming df has columns 'x' and 'y' and is sorted by x +# Convert frontier points to polygon vertices +frontier_models = ["Amber-7B", "OLMo-0424-7B", "DCLM-7B", "OLMo-2-7B", "OLMo-2-13B", "Qwen-2.5-14B"] +frontier_df = df[df[MODEL_COLUMN_NAME].isin(frontier_models)] +frontier_df = frontier_df.set_index(MODEL_COLUMN_NAME) +frontier_df = frontier_df.reindex(frontier_models) +frontier_df = frontier_df.reset_index() + +# Create simple vertices array: +X = np.array([[xmin, ymin]]) # Start bottom-left +for _, row in frontier_df.iterrows(): + X = np.append(X, [[row[FLOPS_COLUMN_NAME], row[METRIC_COLUMN_NAME]]], axis=0) +X = np.append(X, [[xmax, ymax]], axis=0) # Top-right corner +X = np.append(X, [[xmin, ymax]], axis=0) # Back to left + +# Create and add polygon +polygon = plt.Polygon(X, facecolor="yellow", alpha=0.2, zorder=-1, edgecolor="orange", linestyle="--", linewidth=2) +plt.gca().add_patch(polygon) + +# Save the figure +for output_path in OUTPUT_PATHS: + plt.savefig(output_path, dpi=300, bbox_inches="tight") From a815e07bd7c99a745ab553c6a7240a07a7145e2a Mon Sep 17 00:00:00 2001 From: kyleclo Date: Tue, 26 Nov 2024 17:53:02 -0800 Subject: [PATCH 2/5] add example csv --- scripts/flops_by_perf_figure.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/scripts/flops_by_perf_figure.py b/scripts/flops_by_perf_figure.py index 51499ab7c..3f050a82a 100644 --- a/scripts/flops_by_perf_figure.py +++ b/scripts/flops_by_perf_figure.py @@ -6,6 +6,29 @@ @kylel + +Sample CSV file looks like: +``` +Model,FLOPs,Average,ARC Challenge,HSwag,WinoG,MMLU,DROP,NQ,AGIEval,GSM8k,MMLU Pro,TriviaQA +Amber-7B,5.091E+22,35.2,44.9,74.5,65.5,24.7,26.1,18.7,21.8,4.8,11.7,59.3 +DCLM-7B,1.033E+23,56.9,79.8,82.3,77.3,64.4,39.3,28.8,47.5,46.1,31.3,72.1 +Gemma-2-9B,4.436E+23,67.8,89.5,87.3,78.8,70.6,63,38,57.3,70.1,42,81.8 +Llama-2-13B,1.562E+23,54.1,67.3,83.9,74.9,55.7,45.6,38.4,41.5,28.1,23.9,81.3 +Llama-3.1-8B,7.227E+23,61.8,79.5,81.6,76.6,66.9,56.4,33.9,51.3,56.5,34.7,80.3 +MAP-Neo-7B,2.106E+23,49.6,78.4,72.8,69.2,58,39.4,28.9,45.8,12.5,25.9,65.1 +Mistral-7B-v0.3,,58.8,78.3,83.1,77.7,63.5,51.8,37.2,47.3,40.1,30,79.3 +Mistral-Nemo-Bs-12B,,66.9,85.2,85.6,81.5,69.5,69.2,39.7,54.7,62.1,36.7,84.6 +OLMo-0424-7B,8.679E+22,50.7,66.9,80.1,73.6,54.3,50,29.6,43.9,27.7,22.1,58.8 +OLMo-2-1124-13B,4.609E+23,68.3,83.5,86.4,81.5,67.5,70.7,46.7,54.2,75.1,35.1,81.9 +OLMo-2-1124-7B,1.771E+23,62.9,79.8,83.8,77.2,63.7,60.8,36.9,50.4,67.5,31,78 +OLMo-7B,1.018E+23,38.3,46.4,78.1,68.5,28.3,27.3,24.8,23.7,9.2,12.1,64.1 +Qwen-2.5-14B,1.595E+24,72.2,94.0,94,80,79.3,51.5,37.3,71,83.4,52.8,79.1 +Qwen-2.5-7B,8.225E+23,67.4,89.5,89.7,74.2,74.4,55.8,29.9,63.7,81.5,45.8,69.4 +StableLM-2-12B,2.929E+23,62.2,81.9,84.5,77.7,62.4,55.5,37.6,50.9,62,29.3,79.9 +Zamba-2-7B,,65.2,92.2,89.4,79.6,68.5,51.7,36.5,55.5,67.2,32.8,78.8 +``` + + """ import matplotlib.pyplot as plt From 6e00d891f48844d13cb143e9b2e01cc38144c657 Mon Sep 17 00:00:00 2001 From: kyleclo Date: Wed, 18 Dec 2024 14:17:34 -0800 Subject: [PATCH 3/5] manrope --- scripts/flops_by_perf_figure.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/scripts/flops_by_perf_figure.py b/scripts/flops_by_perf_figure.py index 3f050a82a..4bf88f88d 100644 --- a/scripts/flops_by_perf_figure.py +++ b/scripts/flops_by_perf_figure.py @@ -31,7 +31,37 @@ """ +# First clear any existing font cache +import shutil + +import matplotlib as mpl +import matplotlib.font_manager as fm import matplotlib.pyplot as plt +from matplotlib.font_manager import FontProperties + +# Find path to Manrope font on your computer +# fonts = [f for f in fm.findSystemFonts()] +# manrope_fonts = [f for f in fonts if "manrope" in f.lower()] +# for font in manrope_fonts: +# print(font) + + +cache_dir = mpl.get_cachedir() +shutil.rmtree(cache_dir) + +# Load the font file +font_path = "/Users/kylel/Library/Fonts/Manrope-VariableFont_wght.ttf" +font_prop = FontProperties(fname=font_path) + + +# Try setting weight after creation +font_prop.set_weight(500) + + +# Set it globally using the font property +plt.rcParams["font.family"] = "sans-serif" +plt.rcParams["font.sans-serif"] = [font_prop.get_name()] + import numpy as np import pandas as pd From 09730d47474e44ca98d2a13d182342937f5ad120 Mon Sep 17 00:00:00 2001 From: kyleclo Date: Wed, 18 Dec 2024 14:31:52 -0800 Subject: [PATCH 4/5] remove box legend; shift down --- scripts/flops_by_perf_figure.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/flops_by_perf_figure.py b/scripts/flops_by_perf_figure.py index 4bf88f88d..e03a51e33 100644 --- a/scripts/flops_by_perf_figure.py +++ b/scripts/flops_by_perf_figure.py @@ -46,9 +46,6 @@ # print(font) -cache_dir = mpl.get_cachedir() -shutil.rmtree(cache_dir) - # Load the font file font_path = "/Users/kylel/Library/Fonts/Manrope-VariableFont_wght.ttf" font_prop = FontProperties(fname=font_path) @@ -226,7 +223,6 @@ def format_scientific(x): # Customize the plot plt.xlabel("Approximate FLOPs", fontsize=12) plt.ylabel(f"Avg Performance ({num_datasets} Benchmarks)", fontsize=12) -# plt.title("FLOPs vs Performance", fontsize=14, pad=20) # Add grid plt.grid(True, which="both", ls="-", alpha=0.2) @@ -241,7 +237,7 @@ def format_scientific(x): plt.legend( ordered_handles, desired_order, - bbox_to_anchor=(0, 1.02, 1.0, 0.2), + bbox_to_anchor=(0, 0.97, 1.0, 0.2), loc="center", ncol=len(categories), mode="expand", @@ -249,6 +245,7 @@ def format_scientific(x): fontsize=8, handletextpad=0.05, # Reduce space between marker and label columnspacing=0.5, # Adjust space between columns + frameon=False, # Remove the legend border ) # Adjust the layout to prevent legend cutoff From e072c1a2fcd1c4c48d6a5bcf51e33d97ead41e7f Mon Sep 17 00:00:00 2001 From: Luca Soldaini Date: Wed, 18 Dec 2024 21:42:53 -0800 Subject: [PATCH 5/5] impoved look --- scripts/flops_by_perf_figure.py | 204 ++++++++++++++------------------ 1 file changed, 92 insertions(+), 112 deletions(-) diff --git a/scripts/flops_by_perf_figure.py b/scripts/flops_by_perf_figure.py index e03a51e33..0cbdfcd23 100644 --- a/scripts/flops_by_perf_figure.py +++ b/scripts/flops_by_perf_figure.py @@ -1,70 +1,25 @@ -""" - -Plotting the FLOPS by performance figure - -Don't forget to run `pip install -e '.[figures]'` to install the necessary dependencies. - -@kylel - - -Sample CSV file looks like: -``` -Model,FLOPs,Average,ARC Challenge,HSwag,WinoG,MMLU,DROP,NQ,AGIEval,GSM8k,MMLU Pro,TriviaQA -Amber-7B,5.091E+22,35.2,44.9,74.5,65.5,24.7,26.1,18.7,21.8,4.8,11.7,59.3 -DCLM-7B,1.033E+23,56.9,79.8,82.3,77.3,64.4,39.3,28.8,47.5,46.1,31.3,72.1 -Gemma-2-9B,4.436E+23,67.8,89.5,87.3,78.8,70.6,63,38,57.3,70.1,42,81.8 -Llama-2-13B,1.562E+23,54.1,67.3,83.9,74.9,55.7,45.6,38.4,41.5,28.1,23.9,81.3 -Llama-3.1-8B,7.227E+23,61.8,79.5,81.6,76.6,66.9,56.4,33.9,51.3,56.5,34.7,80.3 -MAP-Neo-7B,2.106E+23,49.6,78.4,72.8,69.2,58,39.4,28.9,45.8,12.5,25.9,65.1 -Mistral-7B-v0.3,,58.8,78.3,83.1,77.7,63.5,51.8,37.2,47.3,40.1,30,79.3 -Mistral-Nemo-Bs-12B,,66.9,85.2,85.6,81.5,69.5,69.2,39.7,54.7,62.1,36.7,84.6 -OLMo-0424-7B,8.679E+22,50.7,66.9,80.1,73.6,54.3,50,29.6,43.9,27.7,22.1,58.8 -OLMo-2-1124-13B,4.609E+23,68.3,83.5,86.4,81.5,67.5,70.7,46.7,54.2,75.1,35.1,81.9 -OLMo-2-1124-7B,1.771E+23,62.9,79.8,83.8,77.2,63.7,60.8,36.9,50.4,67.5,31,78 -OLMo-7B,1.018E+23,38.3,46.4,78.1,68.5,28.3,27.3,24.8,23.7,9.2,12.1,64.1 -Qwen-2.5-14B,1.595E+24,72.2,94.0,94,80,79.3,51.5,37.3,71,83.4,52.8,79.1 -Qwen-2.5-7B,8.225E+23,67.4,89.5,89.7,74.2,74.4,55.8,29.9,63.7,81.5,45.8,69.4 -StableLM-2-12B,2.929E+23,62.2,81.9,84.5,77.7,62.4,55.5,37.6,50.9,62,29.3,79.9 -Zamba-2-7B,,65.2,92.2,89.4,79.6,68.5,51.7,36.5,55.5,67.2,32.8,78.8 -``` - - -""" - -# First clear any existing font cache -import shutil - -import matplotlib as mpl -import matplotlib.font_manager as fm import matplotlib.pyplot as plt -from matplotlib.font_manager import FontProperties - -# Find path to Manrope font on your computer -# fonts = [f for f in fm.findSystemFonts()] -# manrope_fonts = [f for f in fonts if "manrope" in f.lower()] -# for font in manrope_fonts: -# print(font) - - -# Load the font file -font_path = "/Users/kylel/Library/Fonts/Manrope-VariableFont_wght.ttf" -font_prop = FontProperties(fname=font_path) +import argparse +import numpy as np +import pandas as pd +from matplotlib import font_manager +from cached_path import cached_path -# Try setting weight after creation -font_prop.set_weight(500) +ap = argparse.ArgumentParser() +ap.add_argument("results_data_path", type=str, help="Path to the results data CSV file.") +ap.add_argument("output_dir", type=str, help="Path to the output directory") +ap.add_argument("--manrope-medium-font-path", type=str, help="Path to the Manrope Medium font file", default="https://dolma-artifacts.org/Manrope-Medium.ttf") +args = ap.parse_args() +# Add Manrope font +font_manager.fontManager.addfont(cached_path(args.manrope_medium_font_path)) +plt.rcParams['font.family'] = 'Manrope' +plt.rcParams['font.weight'] = 'medium' -# Set it globally using the font property -plt.rcParams["font.family"] = "sans-serif" -plt.rcParams["font.sans-serif"] = [font_prop.get_name()] -import numpy as np -import pandas as pd - -RESULTS_DATA_PATH = "/Users/kylel/ai2/olmo/olmo2.csv" -OUTPUT_PATHS = ["/Users/kylel/ai2/olmo/olmo2.pdf", "/Users/kylel/ai2/olmo/olmo2.png"] -df = pd.read_csv(RESULTS_DATA_PATH) +OUTPUT_PATHS = [f"{args.output_dir}/olmo2.pdf", f"{args.output_dir}/olmo2.png"] +df = pd.read_csv(args.results_data_path) # don't count Model, Flops, and Average columns num_datasets = len(df.columns) - 3 @@ -77,6 +32,15 @@ OFFSET_COLUMN_NAME = "label_offset" MARKER_COLUMN_NAME = "marker" +AI2_YELLOW = "#fff500" +AI2_ORANGE = "#f65834" +AI2_DARK_TEAL = "#0a3235" +AI2_OFF_WHITE = "#faf2e9" +AI2_TEAL = "#105257" +AI2_PINK = "#f0529c" +AI2_PURPLE = "#b11be8" +AI2_GREEN = "#0fcb8c" + # remove Zamba model (SSM, not a language model) df = df[df[MODEL_COLUMN_NAME] != "Zamba-2-7B"] @@ -93,6 +57,8 @@ "OLMo-0424-7B": "Previous OLMo", "OLMo-2-1124-13B": "Latest OLMo", "OLMo-2-1124-7B": "Latest OLMo", + "OLMo-2-13B": "Latest OLMo", + "OLMo-2-7B": "Latest OLMo", "OLMo-7B": "Previous OLMo", "Qwen-2.5-14B": "Open weights", "Qwen-2.5-7B": "Open weights", @@ -111,26 +77,34 @@ "Previous OLMo": "#F697C4", # light pink "Latest OLMo": "#F0529C", # dark pink } +category_to_text_color = { + "Open weights": AI2_DARK_TEAL, + "Partially open": AI2_DARK_TEAL, + "Other fully open": AI2_DARK_TEAL, + "Previous OLMo": AI2_DARK_TEAL, + "Latest OLMo": "#a51c5c", # darker pink +} + df[COLOR_COLUMN_NAME] = df[CATEGORY_COLUMN_NAME].map(category_to_color) model_name_to_label_offset = { - "Amber-7B": [10, -2], # Move left and down to use empty space - "DCLM-7B": [-20, 10], # Move right and up into empty area - "Mistral-7B-v0.3": [-20, 8], # Move left and up - "Mistral-Nemo-Bs-12B": [20, -8], # Move right and down - "Gemma-2-9B": [-35, -15], # Move left and down - "Llama-2-13B": [0, 10], # Move right and slightly up - "Llama-3.1-8B": [0, -15], # Move right and down - "MAP-Neo-7B": [-20, -15], # Move left and down into empty space - "Zamba-2-7B": [-25, 10], # Move left and up - "OLMo-0424-7B": [-35, -15], # Move right and slightly down - "OLMo-2-1124-13B": [-20, 10], # Move left and up - "OLMo-2-1124-7B": [-35, 10], # Move right - "OLMo-7B": [-35, 10], # Move left and up into empty space - "Qwen-2.5-14B": [-40, -15], # Move right and up - "Qwen-2.5-7B": [-20, -15], # Move left and down - "StableLM-2-12B": [0, -15], # Move right and up + "Amber-7B": [10, -2], + "DCLM-7B": [-18, 8], + "Mistral-7B-v0.3": [-20, 8], + "Mistral-Nemo-Bs-12B": [20, -8], + "Gemma-2-9B": [-35, -15], + "Llama-2-13B": [-5, 7], + "Llama-3.1-8B": [-20, -13], + "MAP-Neo-7B": [-20, -15], + "Zamba-2-7B": [-25, 10], + "OLMo-0424-7B": [-35, -15], + "OLMo-2-1124-13B": [-20, 10], + "OLMo-2-1124-7B": [-35, 10], + "OLMo-7B": [-15, 10], + "Qwen-2.5-14B": [-40, -15], + "Qwen-2.5-7B": [-20, -15], + "StableLM-2-12B": [-20, -15], } df[OFFSET_COLUMN_NAME] = df[MODEL_COLUMN_NAME].map(model_name_to_label_offset) @@ -145,14 +119,12 @@ } # Clean up labels -# rename models model_name_to_new_name = { "OLMo-2-1124-13B": "OLMo-2-13B", "OLMo-2-1124-7B": "OLMo-2-7B", } df[MODEL_COLUMN_NAME] = df[MODEL_COLUMN_NAME].replace(model_name_to_new_name) - # marker size category_to_marker_size = { "Open weights": 40, @@ -171,12 +143,9 @@ "Latest OLMo": 1.0, } - # Scale -# plt.xscale("log") plt.xscale("function", functions=(np.sqrt, np.square)) - # Plotting order desired_order = ["Latest OLMo", "Previous OLMo", "Other fully open", "Partially open", "Open weights"] for category in categories: @@ -186,12 +155,13 @@ data[FLOPS_COLUMN_NAME], data[METRIC_COLUMN_NAME], label=category, - c=data[COLOR_COLUMN_NAME], # Use the colors column - marker=category_to_marker[category], # Add the marker parameter + c=data[COLOR_COLUMN_NAME], + marker=category_to_marker[category], alpha=category_to_alpha[category], s=category_to_marker_size[category], ) -# Add labels for each point + +# Add labels for each point with Manrope Medium FONTSIZE = 9 for idx, row in df[df[FLOPS_COLUMN_NAME].notna()].iterrows(): plt.annotate( @@ -201,34 +171,41 @@ textcoords="offset points", fontsize=FONTSIZE, alpha=1.0, + font='Manrope', + weight='medium', + color=category_to_text_color[model_name_to_open_status[row[MODEL_COLUMN_NAME]]], ) # x axis tick marks tick_locations = [4e22, 6e22, 8e22, 1e23, 2e23, 4e23, 6e23, 8e23, 1e24, 2e24] - -# Function to format numbers in scientific notation (10^x) more intuitively def format_scientific(x): - exponent = int(np.log10(x)) # Get the exponent - mantissa = x / (10**exponent) # Get the mantissa - - # Format as "1×10²²", "2×10²²", etc. + exponent = int(np.log10(x)) + mantissa = x / (10**exponent) return f"{int(mantissa)}×10{str(exponent).translate(str.maketrans('0123456789', '⁰¹²³⁴⁵⁶⁷⁸⁹'))}" - tick_labels = [format_scientific(x) for x in tick_locations] -plt.xticks(tick_locations, tick_labels, rotation=45, ha="right") +plt.xticks(tick_locations, tick_labels, rotation=45, ha="right", fontsize=8) + +# y axis tick marks +plt.yticks(fontsize=8) +# Customize the plot with Manrope Medium +plt.xlabel("Approximate FLOPs", fontsize=10, font='Manrope', weight='medium') +plt.ylabel(f"Avg Performance ({num_datasets} Benchmarks)", fontsize=10, font='Manrope', weight='medium') -# Customize the plot -plt.xlabel("Approximate FLOPs", fontsize=12) -plt.ylabel(f"Avg Performance ({num_datasets} Benchmarks)", fontsize=12) -# Add grid -plt.grid(True, which="both", ls="-", alpha=0.2) +# Add grid with custom colors +plt.grid(True, which="major", ls=":", color="#105257", alpha=0.2) +plt.grid(True, which="minor", ls="-", color="#9fbabc", alpha=0.2) -# Customize legend -# plt.legend(title="", title_fontsize=10, fontsize=10, bbox_to_anchor=(1.05, 1), loc="upper left") +# Also set the tick colors +plt.tick_params(which='major', colors='#105257') +plt.tick_params(which='minor', colors='#9fbabc') + +# If you want to change the actual axis line colors as well +plt.gca().spines['left'].set_color('#105257') +plt.gca().spines['bottom'].set_color('#105257') # Add the legend below the plot handles, labels = plt.gca().get_legend_handles_labels() @@ -242,16 +219,16 @@ def format_scientific(x): ncol=len(categories), mode="expand", borderaxespad=0.0, - fontsize=8, - handletextpad=0.05, # Reduce space between marker and label - columnspacing=0.5, # Adjust space between columns - frameon=False, # Remove the legend border + fontsize=6, + handletextpad=0.05, + columnspacing=0.5, + frameon=False, + prop={'family': 'Manrope', 'weight': 'medium', 'size': 8} ) -# Adjust the layout to prevent legend cutoff +# Adjust the layout plt.tight_layout() -plt.subplots_adjust(top=0.8) # Make room for the legend - +plt.subplots_adjust(top=0.8) # Remove spines plt.gca().spines["top"].set_visible(False) @@ -261,7 +238,6 @@ def format_scientific(x): xmin, xmax = plt.gca().get_xlim() ymin, ymax = plt.gca().get_ylim() -# Assuming df has columns 'x' and 'y' and is sorted by x # Convert frontier points to polygon vertices frontier_models = ["Amber-7B", "OLMo-0424-7B", "DCLM-7B", "OLMo-2-7B", "OLMo-2-13B", "Qwen-2.5-14B"] frontier_df = df[df[MODEL_COLUMN_NAME].isin(frontier_models)] @@ -269,15 +245,19 @@ def format_scientific(x): frontier_df = frontier_df.reindex(frontier_models) frontier_df = frontier_df.reset_index() -# Create simple vertices array: +# in order for the line not to appear at the top of the polygon, we need to offset it +polygon_line_width = 1 +polygon_offset = (ymax - ymin) * (polygon_line_width / 100) + +# Create simple vertices array X = np.array([[xmin, ymin]]) # Start bottom-left for _, row in frontier_df.iterrows(): X = np.append(X, [[row[FLOPS_COLUMN_NAME], row[METRIC_COLUMN_NAME]]], axis=0) -X = np.append(X, [[xmax, ymax]], axis=0) # Top-right corner -X = np.append(X, [[xmin, ymax]], axis=0) # Back to left +X = np.append(X, [[xmax, ymax + polygon_offset]], axis=0) # Top-right corner +X = np.append(X, [[xmin, ymax + polygon_offset]], axis=0) # Back to left # Create and add polygon -polygon = plt.Polygon(X, facecolor="yellow", alpha=0.2, zorder=-1, edgecolor="orange", linestyle="--", linewidth=2) +polygon = plt.Polygon(X, facecolor=AI2_YELLOW, alpha=0.2, zorder=-1, edgecolor=AI2_ORANGE, linestyle="--", linewidth=1.5) plt.gca().add_patch(polygon) # Save the figure