From a40ed1ebd44c3d90eac2189ac95562ad2bb86ef3 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Tue, 5 Nov 2024 17:24:30 +0000 Subject: [PATCH] Peteish curve fitting --- .../scaling_laws/download_wandb_logs.py | 56 +++++-- olmo/scaling/scaling_laws/merge_wandb_logs.py | 30 ++++ olmo/scaling/scaling_laws/utils.py | 33 +++- scripts/ladder_peteish.py | 1 + scripts/scaling/final_peteish.json | 62 ++++++++ scripts/scaling/final_peteish.sh | 12 ++ scripts/scaling/joint.py | 39 ++--- scripts/scaling/joint_lr.py | 74 +++++---- scripts/scaling/joint_lr_minus.py | 112 +++++++++++++ .../scaling/joint_lr_power_minus_powerd.py | 73 +++++---- scripts/scaling/joint_peteish.json | 149 ++++++++++++++++++ scripts/scaling/joint_peteish_const.json | 37 +++++ scripts/scaling/residue.py | 32 ++-- scripts/scaling/residue_peteish.json | 142 +++++++++++++++++ 14 files changed, 731 insertions(+), 121 deletions(-) create mode 100644 olmo/scaling/scaling_laws/merge_wandb_logs.py create mode 100644 scripts/scaling/final_peteish.json create mode 100644 scripts/scaling/final_peteish.sh create mode 100644 scripts/scaling/joint_lr_minus.py create mode 100644 scripts/scaling/joint_peteish.json create mode 100644 scripts/scaling/joint_peteish_const.json create mode 100644 scripts/scaling/residue_peteish.json diff --git a/olmo/scaling/scaling_laws/download_wandb_logs.py b/olmo/scaling/scaling_laws/download_wandb_logs.py index 8858ecfcc..ca0a67d03 100644 --- a/olmo/scaling/scaling_laws/download_wandb_logs.py +++ b/olmo/scaling/scaling_laws/download_wandb_logs.py @@ -52,8 +52,9 @@ def get_runs(run_paths: List) -> List: def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("-n", "--wandb-names", type=str, nargs="+", required=True, help="Full run name or regex") - parser.add_argument("-x", "--x-axis", type=str, default="throughput/total_tokens", help="X axis") + parser.add_argument("-x", "--x-axis", type=str, default="_step", help="X axis") parser.add_argument("-y", "--y-axis", nargs="+", type=str, default=["train/Perplexity"], help="Y axis") + parser.add_argument("-e", "--eval-only", action="store_true") parser.add_argument( "-o", "--output-path", @@ -96,6 +97,13 @@ def main(args): + [f"eval/downstream/{d}" for d in downstream_newline] ) + if not args.eval_only: + args.y_axis += [ + "throughput/total_tokens", + "throughput/total_training_Gflops", + "optim/learning_rate_group0", + ] + wb_runs = get_runs(args.wandb_names) print("Downloading the data from the following wandb runs:\n", "\n".join([str(run) for run in wb_runs])) @@ -104,23 +112,14 @@ def main(args): if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_path, "w") as file_ref: - writer = csv.DictWriter( - file_ref, - fieldnames=[args.x_axis] - + ["throughput/total_training_Gflops"] - + args.y_axis - + ["optim/learning_rate_group0", "learning_rate_peak", "batch_size_in_tokens"], - ) + writer = csv.DictWriter(file_ref, fieldnames=[args.x_axis] + args.y_axis + ["learning_rate_peak", "batch_size_in_tokens"]) writer.writeheader() rows = [] for wb_run in tqdm(wb_runs): print(f"Processing {wb_run.name}") history = wb_run.scan_history( - keys=[args.x_axis] - + ["throughput/total_training_Gflops"] - + args.y_axis - + ["optim/learning_rate_group0"], + keys=[args.x_axis] + args.y_axis, page_size=10000, ) # page_size cannot be too big, it will make it faster but it will start to downsample @@ -130,10 +129,10 @@ def main(args): ) for wb_step in history: - rows.append(wb_step) wb_step["learning_rate_peak"] = config["optimizer"]["value"]["learning_rate"] # With certain run restarts, we also update the batch size. wb_step["batch_size_in_tokens"] = batch_size_in_tokens + rows.append(wb_step) row_by_key = {} for row in rows: @@ -246,6 +245,37 @@ def main(args): # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/amberish-rulebased-3B-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/amberish-rulebased/3B-2xC.csv # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/amberish-rulebased-3B-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/amberish-rulebased/3B-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7' -y eval/downstream/arc_easy_acc -o wandb/peteish7_train.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7-eval' -y eval/validation-and-bpb-and-downstream -o wandb/peteish7_eval_final.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7-eval' -y eval/validation-and-bpb-and-downstream -e -o wandb/peteish7_eval_full.csv + + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-190M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/190M-1xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-370M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/370M-1xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-600M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/600M-1xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-760M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/760M-1xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-1B-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/1B-1xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-190M-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/190M-2xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-370M-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/370M-2xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-600M-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/600M-2xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-760M-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/760M-2xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-1B-2xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/1B-2xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-190M-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/190M-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-370M-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/370M-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-600M-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/600M-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-760M-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/760M-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-1B-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/1B-5xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-190M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/190M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-370M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/370M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-600M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/600M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-760M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/760M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-1B-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/1B-10xC.csv + + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-const-190M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-const/190M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-const-370M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-const/370M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-const-600M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-const/600M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-const-760M-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-const/760M-10xC.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-const-1B-10xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-const/1B-10xC.csv + args = parse_args() print(args) main(args) diff --git a/olmo/scaling/scaling_laws/merge_wandb_logs.py b/olmo/scaling/scaling_laws/merge_wandb_logs.py new file mode 100644 index 000000000..77e9335e5 --- /dev/null +++ b/olmo/scaling/scaling_laws/merge_wandb_logs.py @@ -0,0 +1,30 @@ +import csv +import sys + +train_path = sys.argv[1] +eval_path = sys.argv[2] + +train_row_by_step = {} +with open(train_path, 'r') as f: + reader = csv.DictReader(f) + for row in reader: + step = int(row['_step']) + train_row_by_step[step] = row + +rows = [] +with open(eval_path, 'r') as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames + for row in reader: + step = int(row['_step']) + if step in train_row_by_step: + train_row = train_row_by_step[step] + train_row = {k: train_row[k] for k in ["throughput/total_tokens", "throughput/total_training_Gflops", "optim/learning_rate_group0"]} + row.update(train_row) + rows.append(row) + +with open(eval_path, 'w') as f: + writer = csv.DictWriter(f, fieldnames=rows[0].keys()) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/olmo/scaling/scaling_laws/utils.py b/olmo/scaling/scaling_laws/utils.py index 0958b18d5..67b640e44 100644 --- a/olmo/scaling/scaling_laws/utils.py +++ b/olmo/scaling/scaling_laws/utils.py @@ -290,7 +290,7 @@ class FinalConfig: "all-val-lm": [f"eval/{val}/CrossEntropyLoss" for val in validation], "all-bpb": [f"eval/downstream_bpb/{task}_bpb" for task in downstream_bpb], "c4": ["eval/c4_en-validation/CrossEntropyLoss"], - "mmlu": [ + "mmlu-var": [ f"eval/downstream_bpb/{task}_bpb" for task in [ "mmlu_stem_var_bpb", @@ -299,6 +299,10 @@ class FinalConfig: "mmlu_other_var_bpb", ] ], + "mmlu-stem-var": ["eval/downstream_bpb/mmlu_stem_var_bpb_bpb"], + "mmlu-humanities-var": ["eval/downstream_bpb/mmlu_humanities_var_bpb_bpb"], + "mmlu-social-sciences-var": ["eval/downstream_bpb/mmlu_social_sciences_var_bpb_bpb"], + "mmlu-other-var": ["eval/downstream_bpb/mmlu_other_var_bpb_bpb"], "hellaswag-5shot": ["eval/downstream_bpb/hellaswag_rc_5shot_bpb_bpb"], "arc-e-5shot": ["eval/downstream_bpb/arc_easy_rc_5shot_bpb_bpb"], "arc-c-5shot": ["eval/downstream_bpb/arc_challenge_rc_5shot_bpb_bpb"], @@ -311,6 +315,12 @@ class FinalConfig: "csqa-5shot": ["eval/downstream_bpb/csqa_rc_5shot_bpb_bpb"], "socialiqa-5shot": ["eval/downstream_bpb/socialiqa_rc_5shot_bpb_bpb"], } +WEIGHT_BY_KEY = { + "mmlu_stem_var_bpb": 0.215, + "mmlu_humanities_var_bpb": 0.335, + "mmlu_social_sciences_var_bpb": 0.219, + "mmlu_other_var_bpb": 0.231, +} def parse_args(): @@ -364,7 +374,7 @@ def get_data_by_name(configs: Dict[str, ExtrapolateNConfig], keys: List[str], mi last_fake_lr = fake_lr last_d = d encountered_ds.add(d) - y = np.mean([float(row[key]) for key in keys]) + y = np.average([float(row[key]) for key in keys], weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in keys]) if min_step is not None and d < min_step * batch_size: continue data_by_name[name]["ns"].append(n) @@ -388,7 +398,7 @@ def get_final_data_by_name(configs, keys, num_to_avg=1): ds, ys = [], [] for row in rows: d = int(float(row["throughput/total_tokens"])) - y = np.mean([float(row[key]) for key in keys]) + y = np.average([float(row[key]) for key in keys], weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in keys]) ds.append(d) ys.append(y) d = np.mean(ds) @@ -497,6 +507,23 @@ def grad_chinchilla_n_d_lr_fit(x, p): return [grad_a, grad_b, grad_alpha, grad_beta, grad_E, grad_F] +# x[0] = n, x[1] = d, x[2] = h +# p[0] = a = log(A), p[1] = b = log(B), p[2] = alpha, p[3] = beta, p[4] = E, p[5] = F +def chinchilla_n_d_lr_minus_fit(x, p): + # return e**a / x[0]**alpha + e**b / x[1]**beta + E - F * (1 - x[2]) + return np.exp(p[0]) / x[0] ** p[2] + np.exp(p[1]) / x[1] ** p[3] + p[4] - p[5] * (1 - x[2]) + + +def grad_chinchilla_n_d_lr_minus_fit(x, p): + grad_a = np.exp(p[0]) / x[0] ** p[2] + grad_b = np.exp(p[1]) / x[1] ** p[3] + grad_alpha = np.exp(p[0]) * (-np.log(x[0])) / x[0] ** p[2] + grad_beta = np.exp(p[1]) * (-np.log(x[1])) / x[1] ** p[3] + grad_E = 1 + grad_F = -(1 - x[2]) + return [grad_a, grad_b, grad_alpha, grad_beta, grad_E, grad_F] + + def chinchilla_n_d_lr_log_fit(x, p): # return e**a / x[0]**alpha + e**b / x[1]**beta + E + F * x[2] * np.log(x[0] / e**r + s) return ( diff --git a/scripts/ladder_peteish.py b/scripts/ladder_peteish.py index 06fd24a22..ee7dd5490 100644 --- a/scripts/ladder_peteish.py +++ b/scripts/ladder_peteish.py @@ -300,6 +300,7 @@ def config_from_args(args: argparse.Namespace) -> TrainConfig: label="all-small-ppl-validation", data=DataConfig( drop_last=True, + memmap_dtype="uint32", datasets={ "c4_en-validation": [ f"{read_location}/eval-data/perplexity/v3_small_dolma2-tokenizer/c4_en/val/part-0-00000.npy" diff --git a/scripts/scaling/final_peteish.json b/scripts/scaling/final_peteish.json new file mode 100644 index 000000000..a6e2539be --- /dev/null +++ b/scripts/scaling/final_peteish.json @@ -0,0 +1,62 @@ +{ + "190m": { + "paths": [ + "wandb/peteish-final/190M-1xC.csv", + "wandb/peteish-final/190M-2xC.csv", + "wandb/peteish-final/190M-5xC.csv", + "wandb/peteish-final/190M-10xC.csv" + ], + "mode": "train", + "n": 190354176, + "label": "190m", + "color": "darkred" + }, + "370m": { + "paths": [ + "wandb/peteish-final/370M-1xC.csv", + "wandb/peteish-final/370M-2xC.csv", + "wandb/peteish-final/370M-5xC.csv", + "wandb/peteish-final/370M-10xC.csv" + ], + "mode": "train", + "n": 371262464, + "label": "370m", + "color": "darkorange" + }, + "600m": { + "paths": [ + "wandb/peteish-final/600M-1xC.csv", + "wandb/peteish-final/600M-2xC.csv", + "wandb/peteish-final/600M-5xC.csv", + "wandb/peteish-final/600M-10xC.csv" + ], + "mode": "train", + "n": 597382464, + "label": "600m", + "color": "goldenrod" + }, + "760m": { + "paths": [ + "wandb/peteish-final/760M-1xC.csv", + "wandb/peteish-final/760M-2xC.csv", + "wandb/peteish-final/760M-5xC.csv", + "wandb/peteish-final/760M-10xC.csv" + ], + "mode": "train", + "n": 758220288, + "label": "760m", + "color": "darkgreen" + }, + "1b": { + "paths": [ + "wandb/peteish-final/1B-1xC.csv", + "wandb/peteish-final/1B-2xC.csv", + "wandb/peteish-final/1B-5xC.csv", + "wandb/peteish-final/1B-10xC.csv" + ], + "mode": "train", + "n": 1279395840, + "label": "1b", + "color": "teal" + } +} \ No newline at end of file diff --git a/scripts/scaling/final_peteish.sh b/scripts/scaling/final_peteish.sh new file mode 100644 index 000000000..ba71d4482 --- /dev/null +++ b/scripts/scaling/final_peteish.sh @@ -0,0 +1,12 @@ +python scripts/scaling/final.py -k mmlu-var -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_mmlu-var.png +python scripts/scaling/final.py -k hellaswag-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_hellaswag-5shot.png +python scripts/scaling/final.py -k arc-e-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_arc-e-5shot.png +python scripts/scaling/final.py -k arc-c-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_arc-c-5shot.png +python scripts/scaling/final.py -k piqa-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_piqa-5shot.png +python scripts/scaling/final.py -k winogrande-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_winogrande-5shot.png +python scripts/scaling/final.py -k openbookqa-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_openbookqa-5shot.png +python scripts/scaling/final.py -k boolq-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_boolq-5shot.png +python scripts/scaling/final.py -k sciq-0shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_sciq-0shot.png +python scripts/scaling/final.py -k copa-0shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_copa-0shot.png +python scripts/scaling/final.py -k csqa-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_csqa-5shot.png +python scripts/scaling/final.py -k socialiqa-5shot -c scripts/scaling/final_peteish.json -o figure/peteish-final/final_socialiqa-5shot.png diff --git a/scripts/scaling/joint.py b/scripts/scaling/joint.py index 278b24ca0..59a6e0ce3 100644 --- a/scripts/scaling/joint.py +++ b/scripts/scaling/joint.py @@ -22,12 +22,12 @@ def main(): configs = json.load(f) configs = {name: ExtrapolateNConfig(**config) for name, config in configs.items()} - data_by_name = get_data_by_name(configs, args.keys, min_step=3000) + data_by_name = get_data_by_name(configs, args.keys, min_step=5000) sns.set_style("whitegrid") num_axs = 5 - fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 6, 4.5)) + fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 4, 3)) train_ndhs, train_ys = [], [] for name, data in data_by_name.items(): @@ -42,7 +42,7 @@ def main(): train_ys, chinchilla_n_d_fit, grad_chinchilla_n_d_fit, - p0=[4.0, 15.0, 0.25, 0.7, 1.5], + p0=[4.0, 4.0, 0.3, 0.3, 0.5], bounds=[(0, None), (0, None), (0, None), (0, None), (0, None)], ) a, b, alpha, beta, E = coefficients @@ -63,7 +63,7 @@ def main(): config = configs[name] ax = axs[get_ax(name)] ax.scatter( - data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=5, alpha=0.4 + data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=5, alpha=0.25 ) # plot the fitted curve @@ -89,7 +89,7 @@ def main(): all_rel_errors += rel_errors rel_error = np.mean(rel_errors) ax.annotate( - f"err: {rel_error:.2%}", + f"{rel_error:.2%}", xy=(data["ds"][-1], pred_data["ys"][-1]), xycoords="data", xytext=(-10, 8), @@ -97,27 +97,22 @@ def main(): fontsize=9, color=config.color, ) - axs[3].annotate( - f"L(N, D) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f}\nAvg err: {np.mean(all_rel_errors):.2%}", - xy=(0.15, 0.55), - xycoords="axes fraction", - fontsize=9, - ) - plt.text( - x=0.40, - y=0.90, - s=f"L(n, d) = {A:.2f} / n^{alpha:.2f} + {B:.2f} / d^{beta:.2f} + {E:.2f}", - fontsize=12, - transform=fig.transFigure, - ) + # axs[3].annotate( + # f"L(N, D) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f}\nAvg err: {np.mean(all_rel_errors):.2%}", + # xy=(0.15, 0.55), + # xycoords="axes fraction", + # fontsize=7, + # ) for ax in axs: - ax.legend(loc="upper right", ncols=2, fontsize=8) + ax.legend(loc="upper right", ncols=2, fontsize=7) ax.set_xlabel("Tokens (D)") axs[0].set_ylabel(f"CE loss, {args.key if args.key != '' else args.keys}") - axs[3].set_ylabel("Loss") - axs[3].set_title(args.key) - plt.suptitle("Fitting loss curves") + axs[3].set_title(args.key, fontsize=10) + plt.suptitle( + f"{args.key}\nL(N, D, H) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f}", + fontsize=8, + ) plt.savefig(args.output_path, dpi=300, bbox_inches="tight") diff --git a/scripts/scaling/joint_lr.py b/scripts/scaling/joint_lr.py index c020790b1..86bc38eb7 100644 --- a/scripts/scaling/joint_lr.py +++ b/scripts/scaling/joint_lr.py @@ -2,6 +2,7 @@ import matplotlib.pyplot as plt import numpy as np +import seaborn as sns from olmo.scaling.scaling_laws.utils import ( ExtrapolateNConfig, @@ -21,10 +22,12 @@ def main(): configs = json.load(f) configs = {name: ExtrapolateNConfig(**config) for name, config in configs.items()} - data_by_name = get_data_by_name(configs, args.keys, min_step=3000) + data_by_name = get_data_by_name(configs, args.keys, min_step=5000) + + sns.set_style("whitegrid") num_axs = 5 - fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 8, 6)) + fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 4, 3)) train_ndhs, train_ys = [], [] for name, data in data_by_name.items(): @@ -39,8 +42,8 @@ def main(): train_ys, chinchilla_n_d_lr_fit, grad_chinchilla_n_d_lr_fit, - p0=[4.0, 15.0, 0.25, 0.7, 1.5, 0.05], - bounds=[(None, None), (None, None), (0, None), (0, None), (0, None), (0, None)], + p0=[4.0, 4.0, 0.3, 0.3, 0.5, 0.0], + bounds=[(None, None), (None, None), (0, None), (0, None), (0, None), (None, None)], ) a, b, alpha, beta, E, F = coefficients A, B = np.exp(a), np.exp(b) @@ -62,43 +65,46 @@ def main(): for name, data in data_by_name.items(): config = configs[name] ax = axs[get_ax(name)] - ax.scatter(data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=5.0) + ax.scatter(data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=10, alpha=0.25) # plot the fitted curve for name, data in predicted_data_by_name.items(): config = configs[name] ax = axs[get_ax(name)] - if config.mode == "train": - ax.plot( - data["ds"], - data["ys"], - color=config.color, - linestyle="--", - linewidth=0.8, - label=f"{config.label} (fitted)", - ) - else: - ax.plot( - data["ds"], - data["ys"], - color=config.color, - linestyle="--", - linewidth=0.8, - label=f"{config.label} (predicted)", - ) - plt.text( - x=0.40, - y=0.90, - s=f"L(n, d, h) = {A:.2f} / n^{alpha:.2f} + {B:.2f} / d^{beta:.2f} + {E:.2f} + {F:.2f} * h", - fontsize=12, - transform=fig.transFigure, - ) + ax.plot( + data["ds"], + data["ys"], + color=config.color, + linestyle="--", + linewidth=1.5, + label=f'{config.label} ({"fitted" if config.mode == "train" else "predicted"})', + ) + + # annotate the error + for name, data in data_by_name.items(): + config = configs[name] + ax = axs[get_ax(name)] + pred_data = predicted_data_by_name[name] + rel_errors = [np.abs((pred_y - y) / y) for y, pred_y in zip(data["ys"], pred_data["ys"])] + rel_error = np.mean(rel_errors) + ax.annotate( + f"{rel_error:.2%}", + xy=(data["ds"][-1], pred_data["ys"][-1]), + xycoords="data", + xytext=(-4, 8), + textcoords="offset points", + fontsize=9, + color=config.color, + ) for ax in axs: - ax.legend(loc="upper right", ncols=2, fontsize=10) - ax.set_xlabel("Tokens (d)") - axs[0].set_ylabel(f"CE loss, {args.key if args.key != '' else args.keys}") - plt.suptitle("Fitting loss curves, with LR correction") + ax.legend(loc="upper right", ncols=1, fontsize=7) + ax.set_xlabel("Tokens (D)") + axs[0].set_ylabel("Loss") + plt.suptitle( + f"{args.key}\nL(N, D, H) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f} + {F:.2f} * H", + fontsize=10, + ) plt.savefig(args.output_path, dpi=300, bbox_inches="tight") diff --git a/scripts/scaling/joint_lr_minus.py b/scripts/scaling/joint_lr_minus.py new file mode 100644 index 000000000..0b4fa8802 --- /dev/null +++ b/scripts/scaling/joint_lr_minus.py @@ -0,0 +1,112 @@ +import json + +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns + +from olmo.scaling.scaling_laws.utils import ( + ExtrapolateNConfig, + chinchilla_n_d_lr_minus_fit, + get_ax, + get_coefficients_huber, + get_data_by_name, + grad_chinchilla_n_d_lr_minus_fit, + parse_args, +) + + +def main(): + args = parse_args() + + with open(args.config_path) as f: + configs = json.load(f) + configs = {name: ExtrapolateNConfig(**config) for name, config in configs.items()} + + data_by_name = get_data_by_name(configs, args.keys, min_step=5000) + + sns.set_style("whitegrid") + + num_axs = 5 + fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 4, 3)) + + train_ndhs, train_ys = [], [] + for name, data in data_by_name.items(): + config = configs[name] + if config.mode == "train": + train_ndhs += [[n, d, h] for n, d, h in zip(data["ns"], data["ds"], data["hs"])] + train_ys += data["ys"] + + # fit the parameters + coefficients = get_coefficients_huber( + train_ndhs, + train_ys, + chinchilla_n_d_lr_minus_fit, + grad_chinchilla_n_d_lr_minus_fit, + p0=[4.0, 4.0, 0.3, 0.3, 0.5, 0.0], + bounds=[(None, None), (None, None), (0, None), (0, None), (0, None), (None, None)], + ) + a, b, alpha, beta, E, F = coefficients + A, B = np.exp(a), np.exp(b) + + # make predictions + predicted_data_by_name = {} + for name, data in data_by_name.items(): + config = configs[name] + predicted_data_by_name[name] = { + "ns": data["ns"], + "ds": data["ds"], + "ys": [ + chinchilla_n_d_lr_minus_fit([n, d, h], coefficients) + for n, d, h in zip(data["ns"], data["ds"], data["hs"]) + ], + } + + # plot the actual data + for name, data in data_by_name.items(): + config = configs[name] + ax = axs[get_ax(name)] + ax.scatter(data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=10, alpha=0.25) + + # plot the fitted curve + for name, data in predicted_data_by_name.items(): + config = configs[name] + ax = axs[get_ax(name)] + ax.plot( + data["ds"], + data["ys"], + color=config.color, + linestyle="--", + linewidth=1.5, + label=f'{config.label} ({"fitted" if config.mode == "train" else "predicted"})', + ) + + # annotate the error + for name, data in data_by_name.items(): + config = configs[name] + ax = axs[get_ax(name)] + pred_data = predicted_data_by_name[name] + rel_errors = [np.abs((pred_y - y) / y) for y, pred_y in zip(data["ys"], pred_data["ys"])] + rel_error = np.mean(rel_errors) + ax.annotate( + f"{rel_error:.2%}", + xy=(data["ds"][-1], pred_data["ys"][-1]), + xycoords="data", + xytext=(-4, 8), + textcoords="offset points", + fontsize=9, + color=config.color, + ) + + for ax in axs: + ax.legend(loc="upper right", ncols=1, fontsize=7) + ax.set_xlabel("Tokens (D)") + axs[0].set_ylabel("Loss") + plt.suptitle( + f"{args.key}\nL(N, D, H) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f} - {F:.2f} * H", + fontsize=10, + ) + plt.savefig(args.output_path, dpi=300, bbox_inches="tight") + + +if __name__ == "__main__": + main() diff --git a/scripts/scaling/joint_lr_power_minus_powerd.py b/scripts/scaling/joint_lr_power_minus_powerd.py index 5e3328128..9612d0884 100644 --- a/scripts/scaling/joint_lr_power_minus_powerd.py +++ b/scripts/scaling/joint_lr_power_minus_powerd.py @@ -21,10 +21,10 @@ def main(): configs = json.load(f) configs = {name: ExtrapolateNConfig(**config) for name, config in configs.items()} - data_by_name = get_data_by_name(configs, args.keys, min_step=3000) + data_by_name = get_data_by_name(configs, args.keys, min_step=5000) num_axs = 5 - fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 8, 6)) + fig, axs = plt.subplots(1, num_axs, figsize=(num_axs * 4, 3)) train_ndhs, train_ys = [], [] for name, data in data_by_name.items(): @@ -39,7 +39,7 @@ def main(): train_ys, chinchilla_n_d_lr_power_minus_powerd_fit, grad_chinchilla_n_d_lr_power_minus_powerd_fit, - p0=[3.0, 6.0, 0.1, 0.2, 1.0, 0.05, -0.05, 0.05], + p0=[4.0, 4.0, 0.3, 0.3, 0.5, 0.0, -0.05, 0.05], bounds=[ (None, None), (None, None), @@ -47,8 +47,8 @@ def main(): (0, None), (0, None), (0, None), - (None, None), - (None, None), + (None, 0), + (0, None), ], ) a, b, alpha, beta, E, F, gamma, delta = coefficients @@ -71,43 +71,46 @@ def main(): for name, data in data_by_name.items(): config = configs[name] ax = axs[get_ax(name)] - ax.scatter(data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=5.0) + ax.scatter(data["ds"], data["ys"], color="white", edgecolors=config.color, label=config.label, s=10, alpha=0.4) # plot the fitted curve for name, data in predicted_data_by_name.items(): config = configs[name] ax = axs[get_ax(name)] - if config.mode == "train": - ax.plot( - data["ds"], - data["ys"], - color=config.color, - linestyle="--", - linewidth=0.8, - label=f"{config.label} (fitted)", - ) - else: - ax.plot( - data["ds"], - data["ys"], - color=config.color, - linestyle="--", - linewidth=0.8, - label=f"{config.label} (predicted)", - ) - plt.text( - x=0.40, - y=0.90, - s=f"L(n, d, h) = {A:.2f} / n^{alpha:.2f} + {B:.2f} / d^{beta:.2f} + {E:.2f} - {F:.2f} * (1 - h) * n^{gamma:.2f} * d^{delta:.2f}", - fontsize=12, - transform=fig.transFigure, - ) + ax.plot( + data["ds"], + data["ys"], + color=config.color, + linestyle="--", + linewidth=1.5, + label=f'{config.label} ({"fitted" if config.mode == "train" else "predicted"})', + ) + + # annotate the error + for name, data in data_by_name.items(): + config = configs[name] + ax = axs[get_ax(name)] + pred_data = predicted_data_by_name[name] + rel_errors = [np.abs((pred_y - y) / y) for y, pred_y in zip(data["ys"], pred_data["ys"])] + rel_error = np.mean(rel_errors) + ax.annotate( + f"{rel_error:.2%}", + xy=(data["ds"][-1], pred_data["ys"][-1]), + xycoords="data", + xytext=(-4, 8), + textcoords="offset points", + fontsize=9, + color=config.color, + ) for ax in axs: - ax.legend(loc="upper right", ncols=2, fontsize=10) - ax.set_xlabel("Tokens (d)") - axs[0].set_ylabel(f"CE loss, {args.key if args.key != '' else args.keys}") - plt.suptitle("Fitting loss curves, with LR power minus powerd correction") + ax.legend(loc="upper right", ncols=1, fontsize=7) + ax.set_xlabel("Tokens (D)") + axs[0].set_ylabel("Loss") + plt.suptitle( + f"{args.key}\nL(N, D, H) = {A:.2f} / N^{alpha:.2f} + {B:.2f} / D^{beta:.2f} + {E:.2f} - {F:.2f} * H * N^{gamma:.2f} * D^{delta:.2f}", + fontsize=10, + ) plt.savefig(args.output_path, dpi=300, bbox_inches="tight") diff --git a/scripts/scaling/joint_peteish.json b/scripts/scaling/joint_peteish.json new file mode 100644 index 000000000..203cfdecd --- /dev/null +++ b/scripts/scaling/joint_peteish.json @@ -0,0 +1,149 @@ +{ + "190m-1xC": { + "path": "wandb/peteish-final/190M-1xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-1xC", + "color": "darkred" + }, + "190m-2xC": { + "path": "wandb/peteish-final/190M-2xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-2xC", + "color": "darkred" + }, + "190m-5xC": { + "path": "wandb/peteish-final/190M-5xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-5xC", + "color": "darkred" + }, + "190m-10xC": { + "path": "wandb/peteish-final/190M-10xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-10xC", + "color": "darkred" + }, + "370m-1xC": { + "path": "wandb/peteish-final/370M-1xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-1xC", + "color": "darkorange" + }, + "370m-2xC": { + "path": "wandb/peteish-final/370M-2xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-2xC", + "color": "darkorange" + }, + "370m-5xC": { + "path": "wandb/peteish-final/370M-5xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-5xC", + "color": "darkorange" + }, + "370m-10xC": { + "path": "wandb/peteish-final/370M-10xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-10xC", + "color": "darkorange" + }, + "600m-1xC": { + "path": "wandb/peteish-final/600M-1xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-1xC", + "color": "goldenrod" + }, + "600m-2xC": { + "path": "wandb/peteish-final/600M-2xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-2xC", + "color": "goldenrod" + }, + "600m-5xC": { + "path": "wandb/peteish-final/600M-5xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-5xC", + "color": "goldenrod" + }, + "600m-10xC": { + "path": "wandb/peteish-final/600M-10xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-10xC", + "color": "goldenrod" + }, + "760m-1xC": { + "path": "wandb/peteish-final/760M-1xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-1xC", + "color": "darkgreen" + }, + "760m-2xC": { + "path": "wandb/peteish-final/760M-2xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-2xC", + "color": "darkgreen" + }, + "760m-5xC": { + "path": "wandb/peteish-final/760M-5xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-5xC", + "color": "darkgreen" + }, + "760m-10xC": { + "path": "wandb/peteish-final/760M-10xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-10xC", + "color": "darkgreen" + }, + "1b-1xC": { + "path": "wandb/peteish-final/1B-1xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-1xC", + "color": "teal" + }, + "1b-2xC": { + "path": "wandb/peteish-final/1B-2xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-2xC", + "color": "teal" + }, + "1b-5xC": { + "path": "wandb/peteish-final/1B-5xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-5xC", + "color": "teal" + }, + "1b-10xC": { + "path": "wandb/peteish-final/1B-10xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-10xC", + "color": "teal" + }, + "7b-5T": { + "path": "wandb/peteish7_eval_full.csv", + "mode": "eval", + "n": 6887575552, + "label": "7b-5T", + "color": "darkviolet" + } +} \ No newline at end of file diff --git a/scripts/scaling/joint_peteish_const.json b/scripts/scaling/joint_peteish_const.json new file mode 100644 index 000000000..1e2460aad --- /dev/null +++ b/scripts/scaling/joint_peteish_const.json @@ -0,0 +1,37 @@ +{ + "190m-10xC": { + "path": "wandb/peteish-const/190M-10xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-10xC", + "color": "darkred" + }, + "370m-10xC": { + "path": "wandb/peteish-const/370M-10xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-10xC", + "color": "darkorange" + }, + "600m-10xC": { + "path": "wandb/peteish-const/600M-10xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-10xC", + "color": "goldenrod" + }, + "760m-10xC": { + "path": "wandb/peteish-const/760M-10xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-10xC", + "color": "darkgreen" + }, + "1b-10xC": { + "path": "wandb/peteish-const/1B-10xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-10xC", + "color": "teal" + } +} \ No newline at end of file diff --git a/scripts/scaling/residue.py b/scripts/scaling/residue.py index dac3c2fb1..77244c8af 100644 --- a/scripts/scaling/residue.py +++ b/scripts/scaling/residue.py @@ -32,7 +32,9 @@ def parse_args(): args.keys = [f"eval/{val}/CrossEntropyLoss" for val in validation] elif args.key == "all-bpb": args.keys = [f"eval/downstream_bpb/{task}_bpb" for task in downstream_bpb] - elif args.key == "mmlu-var-bpb": + elif args.key == "hellaswag-5shot": + args.keys = [f"eval/downstream_bpb/hellaswag_rc_5shot_bpb_bpb"] + elif args.key == "mmlu-var": args.keys = [ f"eval/downstream_bpb/{task}_bpb" for task in [ @@ -53,7 +55,7 @@ def parse_args(): 10: "*", } -NS = [151898880, 319980544, 530074944, 681297408, 1176832000] +NS = [190354176, 371262464, 597382464, 758220288, 1279395840] def func_pow_r(x, p): # x = (n, d), p = (U0, U1, U2, U3, U4, r) @@ -201,10 +203,10 @@ def main(): configs = json.load(f) configs = {name: ExtrapolateNConfig(**config) for name, config in configs.items()} - data_by_name = get_data_by_name(configs, args.keys, min_step=3000) + data_by_name = get_data_by_name(configs, args.keys, min_step=5000) const_configs = { name: ExtrapolateNConfig( - path=config.path.replace("5shot", "const") + path=config.path.replace("final", "const") .replace("1xC", "10xC") .replace("2xC", "10xC") .replace("5xC", "10xC"), @@ -215,7 +217,7 @@ def main(): ) for name, config in configs.items() } - const_data_by_name = get_data_by_name(const_configs, args.keys, min_step=3000) + const_data_by_name = get_data_by_name(const_configs, args.keys, min_step=5000) sns.set_style("whitegrid") @@ -240,15 +242,16 @@ def main(): color="white", edgecolors=config.color, label=config.label, - s=5.0, + s=5, + alpha=0.4, ) WARMUP_D_BY_N = { - 151898880: 150208512, - 319980544: 300154880, - 530074944: 530317312, - 681297408: 750256128, - 1176832000: 1000603648, + 190354176: 190354176, + 371262464: 371262464, + 597382464: 597382464, + 758220288: 758220288, + 1279395840: 1279395840, } # overlay a cosine curve @@ -277,7 +280,7 @@ def main(): cosine_ys, color=config.color, linestyle="--", - linewidth=1.0, + linewidth=1.5, ) # # overlay an s2 curve @@ -296,12 +299,13 @@ def main(): rangee_by_ndc[(data["ns"][0], ds[-1], c, name)] = rangee for ax in axs: - ax.set_ylim(-0.20, 0.02) + ax.set_ylim(-0.045, 0.01) ax.legend(loc="upper right", ncols=1, fontsize=8) ax.set_xlabel("Tokens (D)") axs[0].set_ylabel("Residue") - plt.suptitle("Residue of loss against curve of const LR schedule") + plt.suptitle(f"{args.key}") plt.savefig(args.output_path, dpi=300, bbox_inches="tight") + exit() bounds: List[Tuple[Any, Any]] # plot the rangee diff --git a/scripts/scaling/residue_peteish.json b/scripts/scaling/residue_peteish.json new file mode 100644 index 000000000..b0843fc12 --- /dev/null +++ b/scripts/scaling/residue_peteish.json @@ -0,0 +1,142 @@ +{ + "190m-1xC": { + "path": "wandb/peteish-final/190M-1xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-1xC", + "color": "darkred" + }, + "190m-2xC": { + "path": "wandb/peteish-final/190M-2xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-2xC", + "color": "darkred" + }, + "190m-5xC": { + "path": "wandb/peteish-final/190M-5xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-5xC", + "color": "darkred" + }, + "190m-10xC": { + "path": "wandb/peteish-final/190M-10xC.csv", + "mode": "train", + "n": 190354176, + "label": "190m-10xC", + "color": "darkred" + }, + "370m-1xC": { + "path": "wandb/peteish-final/370M-1xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-1xC", + "color": "darkorange" + }, + "370m-2xC": { + "path": "wandb/peteish-final/370M-2xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-2xC", + "color": "darkorange" + }, + "370m-5xC": { + "path": "wandb/peteish-final/370M-5xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-5xC", + "color": "darkorange" + }, + "370m-10xC": { + "path": "wandb/peteish-final/370M-10xC.csv", + "mode": "train", + "n": 371262464, + "label": "370m-10xC", + "color": "darkorange" + }, + "600m-1xC": { + "path": "wandb/peteish-final/600M-1xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-1xC", + "color": "goldenrod" + }, + "600m-2xC": { + "path": "wandb/peteish-final/600M-2xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-2xC", + "color": "goldenrod" + }, + "600m-5xC": { + "path": "wandb/peteish-final/600M-5xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-5xC", + "color": "goldenrod" + }, + "600m-10xC": { + "path": "wandb/peteish-final/600M-10xC.csv", + "mode": "train", + "n": 597382464, + "label": "600m-10xC", + "color": "goldenrod" + }, + "760m-1xC": { + "path": "wandb/peteish-final/760M-1xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-1xC", + "color": "darkgreen" + }, + "760m-2xC": { + "path": "wandb/peteish-final/760M-2xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-2xC", + "color": "darkgreen" + }, + "760m-5xC": { + "path": "wandb/peteish-final/760M-5xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-5xC", + "color": "darkgreen" + }, + "760m-10xC": { + "path": "wandb/peteish-final/760M-10xC.csv", + "mode": "train", + "n": 758220288, + "label": "760m-10xC", + "color": "darkgreen" + }, + "1b-1xC": { + "path": "wandb/peteish-final/1B-1xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-1xC", + "color": "teal" + }, + "1b-2xC": { + "path": "wandb/peteish-final/1B-2xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-2xC", + "color": "teal" + }, + "1b-5xC": { + "path": "wandb/peteish-final/1B-5xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-5xC", + "color": "teal" + }, + "1b-10xC": { + "path": "wandb/peteish-final/1B-10xC.csv", + "mode": "train", + "n": 1279395840, + "label": "1b-10xC", + "color": "teal" + } +} \ No newline at end of file