From 19ce054c46015fddfc743a6b45c2e26b87af6365 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Sun, 17 Nov 2024 01:13:27 +0000 Subject: [PATCH] Predict for Peteish13 --- .../scaling_laws/download_wandb_logs.py | 22 +--- olmo/scaling/scaling_laws/utils.py | 120 +++++++++++------- scripts/scaling/final.json | 51 +++++--- scripts/scaling/predict.py | 4 +- scripts/scaling/step2.json | 104 +++++++++++++++ scripts/scaling/step2.py | 5 +- scripts/scaling/step2_mc.json | 19 ++- 7 files changed, 236 insertions(+), 89 deletions(-) create mode 100644 scripts/scaling/step2.json diff --git a/olmo/scaling/scaling_laws/download_wandb_logs.py b/olmo/scaling/scaling_laws/download_wandb_logs.py index 1cdbd69fd..f10ed7359 100644 --- a/olmo/scaling/scaling_laws/download_wandb_logs.py +++ b/olmo/scaling/scaling_laws/download_wandb_logs.py @@ -70,29 +70,21 @@ def main(args): if args.y_axis == ["eval/all-validation/CrossEntropyLoss"]: args.y_axis = [f"eval/{d}/CrossEntropyLoss" for d in validation] - if args.y_axis == ["eval/all-validation-and-bpb/CrossEntropyLoss"]: - args.y_axis = [f"eval/{d}/CrossEntropyLoss" for d in validation] + [ - f"eval/downstream_bpb/{d}_bpb" for d in downstream_bpb - ] - elif args.y_axis == ["eval/all-v3-validation/CrossEntropyLoss"]: args.y_axis = [f"eval/{d}/CrossEntropyLoss" for d in v3_validation] + elif args.y_axis == ["eval/all-validation-and-bpb/CrossEntropyLoss"]: + args.y_axis = [f"eval/{d}/CrossEntropyLoss" for d in validation] + downstream_bpb + elif args.y_axis == ["eval/downstream/all"]: - args.y_axis = [f"eval/downstream/{d}" for d in downstream] + args.y_axis = downstream elif args.y_axis == ["eval/validation-and-bpb-and-downstream"]: - args.y_axis = ( - [f"eval/{d}/CrossEntropyLoss" for d in validation] - + [f"eval/downstream_bpb/{d}_bpb" for d in downstream_bpb] - + [f"eval/downstream/{d}" for d in downstream] - ) + args.y_axis = [f"eval/{d}/CrossEntropyLoss" for d in validation] + downstream_bpb + downstream elif args.y_axis == ["eval/validation-and-bpb-and-downstream-newline"]: args.y_axis = ( - [f"eval/{d}/CrossEntropyLoss" for d in validation] - + [f"eval/downstream_bpb/{d}_bpb" for d in downstream_bpb] - + [f"eval/downstream/{d}" for d in downstream] + [f"eval/{d}/CrossEntropyLoss" for d in validation] + downstream_bpb + downstream + [f"eval/downstream_bpb/{d}_bpb" for d in downstream_newline_bpb] + [f"eval/downstream/{d}" for d in downstream_newline] ) @@ -252,8 +244,8 @@ def main(args): # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/amberish-rulebased-3B-5xC' -y eval/validation-and-bpb-and-downstream -o wandb/amberish-rulebased/3B-5xC.csv # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7' -y eval/downstream/arc_easy_acc -o wandb/peteish7_train.csv - # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7-eval' -y eval/validation-and-bpb-and-downstream -o wandb/peteish7_eval_final.csv # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish7-eval' -y eval/validation-and-bpb-and-downstream -e -o wandb/peteish7_eval_full.csv + # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-medium/peteish13-eval' -y eval/validation-and-bpb-and-downstream -o wandb/peteish13_eval_final.csv # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-190M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/190M-1xC.csv # python olmo/scaling/scaling_laws/download_wandb_logs.py -n 'ai2-llm/olmo-ladder/peteish-final-370M-1xC' -y eval/validation-and-bpb-and-downstream -o wandb/peteish-final/370M-1xC.csv diff --git a/olmo/scaling/scaling_laws/utils.py b/olmo/scaling/scaling_laws/utils.py index ef19bf063..3bcff33e3 100644 --- a/olmo/scaling/scaling_laws/utils.py +++ b/olmo/scaling/scaling_laws/utils.py @@ -234,6 +234,16 @@ def get_accuracy_keys(tasks: Dict[str, DownstreamTaskPrediction]) -> List[str]: return accuracy_keys +def get_mc_accuracy_keys(tasks: Dict[str, DownstreamTaskPrediction]) -> List[str]: + mc_accuracy_keys: List[str] = [] + for _, task in tasks.items(): + if isinstance(task.task_mc_accuracy_key, list): + mc_accuracy_keys += task.task_mc_accuracy_key + else: + mc_accuracy_keys.append(task.task_mc_accuracy_key) + return mc_accuracy_keys + + # Special case for testing with old tokenizer: downstream_newline = [ @@ -323,9 +333,9 @@ def get_accuracy_keys(tasks: Dict[str, DownstreamTaskPrediction]) -> List[str]: "socialiqa_newline_mc_5shot_bpb", ] -tasks = {**core_5shot_tasks, **mmlu_var_tasks, **mmlu_subset_var_tasks} +tasks = {**core_5shot_tasks, **mmlu_var_tasks} downstream_bpb = get_bpb_keys(tasks) -downstream = get_accuracy_keys(tasks) +downstream = get_accuracy_keys(tasks) + get_mc_accuracy_keys(tasks) KEYS_BY_KEY = { "all-val-lm": [f"eval/{val}/CrossEntropyLoss" for val in validation], @@ -530,58 +540,76 @@ def get_step2_data_by_name(configs, task_name, y_metric="rc_acc", moving_avg=1, data_by_name: Dict = defaultdict(lambda: {"xs": [], "ys": [], "ds": [], "ns": [], "ls": []}) for name, config in configs.items(): - n = config.n - for path in config.paths: - length = get_length(path) - with open(path) as file_ref: - reader = csv.DictReader(file_ref) - rows = [row for row in reader] - xs, ys, ds, ns, ls = [], [], [], [], [] - for row in rows: - d = int(float(row["throughput/total_tokens"])) + if name == "external": + xs, ys = [], [] + for path in config.paths: + with open(path) as f: + data = json.load(f) x = np.average( - [float(row[key]) for key in loss_keys], + [float(data[key]) for key in loss_keys], weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in loss_keys], ) y = np.average( - [float(row[key]) for key in accuracy_keys], + [float(data[key]) for key in accuracy_keys], weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in accuracy_keys], ) xs.append(x) ys.append(y) - ds.append(d) - ns.append(n) - ls.append(length) - - if config.mode == "train": - # skip initial ckpts - - xs = xs[int(np.ceil(skip_perc * len(xs))) :] - ys = ys[int(np.ceil(skip_perc * len(ys))) :] - ds = ds[int(np.ceil(skip_perc * len(ds))) :] - ns = ns[int(np.ceil(skip_perc * len(ns))) :] - ls = ls[int(np.ceil(skip_perc * len(ls))) :] - - # apply moving_avg - xs = moving_average(xs, n=moving_avg).tolist() - # ys = ys[moving_avg-1:] - # ds = ds[moving_avg-1:] - # ns = ns[moving_avg-1:] - # ls = ls[moving_avg-1:] - - # last n points - if last_n_points > 0: - xs = xs[-last_n_points:] - ys = ys[-last_n_points:] - ds = ds[-last_n_points:] - ns = ns[-last_n_points:] - ls = ls[-last_n_points:] - - data_by_name[name]["xs"] += xs - data_by_name[name]["ys"] += ys - data_by_name[name]["ds"] += ds - data_by_name[name]["ns"] += ns - data_by_name[name]["ls"] += ls + data_by_name[name] = {"xs": xs, "ys": ys, "ds": [], "ns": [], "ls": []} + + else: + n = config.n + for path in config.paths: + length = get_length(path) + with open(path) as file_ref: + reader = csv.DictReader(file_ref) + rows = [row for row in reader] + xs, ys, ds, ns, ls = [], [], [], [], [] + for row in rows: + d = int(float(row["throughput/total_tokens"])) + x = np.average( + [float(row[key]) for key in loss_keys], + weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in loss_keys], + ) + y = np.average( + [float(row[key]) for key in accuracy_keys], + weights=[WEIGHT_BY_KEY.get(key, 1.0) for key in accuracy_keys], + ) + xs.append(x) + ys.append(y) + ds.append(d) + ns.append(n) + ls.append(length) + + if config.mode == "train": + # skip initial ckpts + + xs = xs[int(np.ceil(skip_perc * len(xs))) :] + ys = ys[int(np.ceil(skip_perc * len(ys))) :] + ds = ds[int(np.ceil(skip_perc * len(ds))) :] + ns = ns[int(np.ceil(skip_perc * len(ns))) :] + ls = ls[int(np.ceil(skip_perc * len(ls))) :] + + # apply moving_avg + xs = moving_average(xs, n=moving_avg).tolist() + # ys = ys[moving_avg-1:] + # ds = ds[moving_avg-1:] + # ns = ns[moving_avg-1:] + # ls = ls[moving_avg-1:] + + # last n points + if last_n_points > 0: + xs = xs[-last_n_points:] + ys = ys[-last_n_points:] + ds = ds[-last_n_points:] + ns = ns[-last_n_points:] + ls = ls[-last_n_points:] + + data_by_name[name]["xs"] += xs + data_by_name[name]["ys"] += ys + data_by_name[name]["ds"] += ds + data_by_name[name]["ns"] += ns + data_by_name[name]["ls"] += ls data_by_name[name]["mode"] = config.mode diff --git a/scripts/scaling/final.json b/scripts/scaling/final.json index 2bcbf1073..017178756 100644 --- a/scripts/scaling/final.json +++ b/scripts/scaling/final.json @@ -1,10 +1,10 @@ { "190m": { "paths": [ - "scripts/scaling/data/peteish-final-new/190M-1xC.csv", - "scripts/scaling/data/peteish-final-new/190M-2xC.csv", - "scripts/scaling/data/peteish-final-new/190M-5xC.csv", - "scripts/scaling/data/peteish-final-new/190M-10xC.csv" + "wandb/peteish-final/190M-1xC.csv", + "wandb/peteish-final/190M-2xC.csv", + "wandb/peteish-final/190M-5xC.csv", + "wandb/peteish-final/190M-10xC.csv" ], "mode": "train", "n": 190354176, @@ -13,10 +13,10 @@ }, "370m": { "paths": [ - "scripts/scaling/data/peteish-final-new/370M-1xC.csv", - "scripts/scaling/data/peteish-final-new/370M-2xC.csv", - "scripts/scaling/data/peteish-final-new/370M-5xC.csv", - "scripts/scaling/data/peteish-final-new/370M-10xC.csv" + "wandb/peteish-final/370M-1xC.csv", + "wandb/peteish-final/370M-2xC.csv", + "wandb/peteish-final/370M-5xC.csv", + "wandb/peteish-final/370M-10xC.csv" ], "mode": "train", "n": 371262464, @@ -25,10 +25,10 @@ }, "600m": { "paths": [ - "scripts/scaling/data/peteish-final-new/600M-1xC.csv", - "scripts/scaling/data/peteish-final-new/600M-2xC.csv", - "scripts/scaling/data/peteish-final-new/600M-5xC.csv", - "scripts/scaling/data/peteish-final-new/600M-10xC.csv" + "wandb/peteish-final/600M-1xC.csv", + "wandb/peteish-final/600M-2xC.csv", + "wandb/peteish-final/600M-5xC.csv", + "wandb/peteish-final/600M-10xC.csv" ], "mode": "train", "n": 597382464, @@ -37,10 +37,10 @@ }, "760m": { "paths": [ - "scripts/scaling/data/peteish-final-new/760M-1xC.csv", - "scripts/scaling/data/peteish-final-new/760M-2xC.csv", - "scripts/scaling/data/peteish-final-new/760M-5xC.csv", - "scripts/scaling/data/peteish-final-new/760M-10xC.csv" + "wandb/peteish-final/760M-1xC.csv", + "wandb/peteish-final/760M-2xC.csv", + "wandb/peteish-final/760M-5xC.csv", + "wandb/peteish-final/760M-10xC.csv" ], "mode": "train", "n": 758220288, @@ -49,10 +49,10 @@ }, "1b": { "paths": [ - "scripts/scaling/data/peteish-final-new/1B-1xC.csv", - "scripts/scaling/data/peteish-final-new/1B-2xC.csv", - "scripts/scaling/data/peteish-final-new/1B-5xC.csv", - "scripts/scaling/data/peteish-final-new/1B-10xC.csv" + "wandb/peteish-final/1B-1xC.csv", + "wandb/peteish-final/1B-2xC.csv", + "wandb/peteish-final/1B-5xC.csv", + "wandb/peteish-final/1B-10xC.csv" ], "mode": "train", "n": 1279395840, @@ -61,11 +61,20 @@ }, "7b": { "paths": [ - "scripts/scaling/data/peteish-final-new/7B-28xC-anneal-new.csv" + "wandb/peteish7_eval_anneal.csv" ], "mode": "eval", "n": 6887575552, "label": "7b", "color": "darkviolet" + }, + "13b": { + "paths": [ + "wandb/peteish13_eval_final.csv" + ], + "mode": "eval", + "n": 13202396160, + "label": "13b", + "color": "darkmagenta" } } \ No newline at end of file diff --git a/scripts/scaling/predict.py b/scripts/scaling/predict.py index 9ee3772dd..d74da66ab 100644 --- a/scripts/scaling/predict.py +++ b/scripts/scaling/predict.py @@ -1,5 +1,7 @@ -# python scripts/scaling/predict.py -k main -c scripts/scaling/final.json -n 6887575552 -d 3945065873408 -t 7b +# python scripts/scaling/predict.py -k main -c scripts/scaling/final.json --step2-config-path scripts/scaling/step2.json -n 6887575552 -d 3945065873408 -t 7b +# python scripts/scaling/predict.py -k main -c scripts/scaling/final.json --step2-config-path scripts/scaling/step2.json -n 13202396160 -d 5000080130048 -t 13b # python scripts/scaling/predict.py -k main_mc -c scripts/scaling/final.json --step2-config-path scripts/scaling/step2_mc.json -y mc_acc -n 6887575552 -d 3945065873408 -t 7b-4T-final +# python scripts/scaling/predict.py -k main_mc -c scripts/scaling/final.json --step2-config-path scripts/scaling/step2_mc.json -y mc_acc -n 13202396160 -d 5000080130048 -t 13b-5T-final import argparse diff --git a/scripts/scaling/step2.json b/scripts/scaling/step2.json new file mode 100644 index 000000000..b45f9c6cd --- /dev/null +++ b/scripts/scaling/step2.json @@ -0,0 +1,104 @@ +{ + "190m": { + "paths": [ + "wandb/peteish-final/190M-1xC.csv", + "wandb/peteish-final/190M-2xC.csv", + "wandb/peteish-final/190M-5xC.csv", + "wandb/peteish-final/190M-10xC.csv" + ], + "mode": "train", + "n": 190354176, + "label": "190m", + "color": "darkred" + }, + "370m": { + "paths": [ + "wandb/peteish-final/370M-1xC.csv", + "wandb/peteish-final/370M-2xC.csv", + "wandb/peteish-final/370M-5xC.csv", + "wandb/peteish-final/370M-10xC.csv" + ], + "mode": "train", + "n": 371262464, + "label": "370m", + "color": "darkorange" + }, + "600m": { + "paths": [ + "wandb/peteish-final/600M-1xC.csv", + "wandb/peteish-final/600M-2xC.csv", + "wandb/peteish-final/600M-5xC.csv", + "wandb/peteish-final/600M-10xC.csv" + ], + "mode": "train", + "n": 597382464, + "label": "600m", + "color": "goldenrod" + }, + "760m": { + "paths": [ + "wandb/peteish-final/760M-1xC.csv", + "wandb/peteish-final/760M-2xC.csv", + "wandb/peteish-final/760M-5xC.csv", + "wandb/peteish-final/760M-10xC.csv" + ], + "mode": "train", + "n": 758220288, + "label": "760m", + "color": "darkgreen" + }, + "1b": { + "paths": [ + "wandb/peteish-final/1B-1xC.csv", + "wandb/peteish-final/1B-2xC.csv", + "wandb/peteish-final/1B-5xC.csv", + "wandb/peteish-final/1B-10xC.csv" + ], + "mode": "train", + "n": 1279395840, + "label": "1b", + "color": "teal" + }, + "external": { + "paths": [ + "wandb/eval_bpb_mc/allenai_OLMo-7B-0724-hf.json", + "wandb/eval_bpb_mc/allenai_OLMo-7B-hf.json", + "wandb/eval_bpb_mc/allenai_OLMo-1B-hf.json", + "wandb/eval_bpb_mc/meta-llama_Llama-3.2-3B.json", + "wandb/eval_bpb_mc/meta-llama_Llama-3.2-1B.json", + "wandb/eval_bpb_mc/meta-llama_Llama-3.1-8B.json", + "wandb/eval_bpb_mc/meta-llama_Meta-Llama-3-8B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2.5-14B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2.5-7B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2.5-3B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2.5-1.5B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2-7B.json", + "wandb/eval_bpb_mc/Qwen_Qwen2-1.5B.json", + "wandb/eval_bpb_mc/mistralai_Mistral-Nemo-Base-2407.json", + "wandb/eval_bpb_mc/mistralai_Mistral-7B-v0.3.json", + "wandb/eval_bpb_mc/mistralai_Mistral-7B-v0.1.json" + ], + "mode": "train", + "n": 0, + "label": "external", + "color": "silver" + }, + "7b": { + "paths": [ + "wandb/peteish7_eval_anneal.csv" + ], + "mode": "eval", + "n": 6887575552, + "label": "7b", + "color": "darkviolet" + }, + "13b": { + "paths": [ + "wandb/peteish13_eval_final.csv" + ], + "mode": "eval", + "n": 13202396160, + "label": "13b", + "color": "darkmagenta" + } +} \ No newline at end of file diff --git a/scripts/scaling/step2.py b/scripts/scaling/step2.py index 2e828e61e..52f25aba8 100644 --- a/scripts/scaling/step2.py +++ b/scripts/scaling/step2.py @@ -1,3 +1,6 @@ +# python scripts/scaling/step2.py -k main -c scripts/scaling/step2.json -o figure/peteish-final/step2_main.png +# python scripts/scaling/step2.py -k main_mc -c scripts/scaling/step2_mc.json -o figure/peteish-final/step2_mc_main.png -y mc_acc + import argparse import matplotlib.pyplot as plt @@ -173,7 +176,7 @@ def main(): plotted_predicted_data["xs"], plotted_y_lower, plotted_y_upper, color="pink", alpha=0.3 ) # , label="95% Prediction Interval") - ax.legend(loc="upper right", ncols=1, fontsize=8) + ax.legend(loc="lower right", ncols=1, fontsize=8) ax.set_xlabel("Task loss") if args.y_metric == "rc_acc": ax.set_ylabel("Task RC accuracy") diff --git a/scripts/scaling/step2_mc.json b/scripts/scaling/step2_mc.json index aec1b09e0..1aef30124 100644 --- a/scripts/scaling/step2_mc.json +++ b/scripts/scaling/step2_mc.json @@ -1,12 +1,12 @@ { - "7b-4T-70k-300k": { + "7b-4T-70k-end": { "paths": [ - "wandb/peteish7_eval_full_70k-300k.csv" + "wandb/peteish7_eval_70k-end.csv" ], "mode": "train", "n": 6887575552, - "label": "7b-4T-70k-300k", - "color": "darkviolet" + "label": "7b-4T-70k-end", + "color": "violet" }, "7b-4T-final": { "paths": [ @@ -15,6 +15,15 @@ "mode": "eval", "n": 6887575552, "label": "7b-4T-final", - "color": "magenta" + "color": "darkviolet" + }, + "13b-5T-final": { + "paths": [ + "wandb/peteish13_eval_final.csv" + ], + "mode": "eval", + "n": 13202396160, + "label": "13b-5T-final", + "color": "darkmagenta" } } \ No newline at end of file