Skip to content

Commit

Permalink
separate functions
Browse files Browse the repository at this point in the history
  • Loading branch information
AkshitaB committed Nov 19, 2024
1 parent e5ebb23 commit 6e3a1c2
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 101 deletions.
14 changes: 9 additions & 5 deletions olmo/scaling/scaling_laws/fitting_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,25 +328,29 @@ def sigmoid(x, a, x0, k, b):
o = a / (1 + np.exp(-k * (x - x0))) + b
return o


def sigmoid_fit(x, p):
o = p[0] / (1 + np.exp(-p[2] * (x - p[1]))) + p[3]
return o


def grad_sigmoid_fit(x, p):
exp_term = np.exp(-p[2] * (x - p[1]))
denom = (1 + exp_term)
denom = 1 + exp_term
o = p[0] / denom + p[3]

grad_a = 1 / denom
grad_x0 = p[0] * p[2] * exp_term / (denom ** 2)
grad_k = p[0] * (x - p[1]) * exp_term / (denom ** 2)
grad_x0 = p[0] * p[2] * exp_term / (denom**2)
grad_k = p[0] * (x - p[1]) * exp_term / (denom**2)
grad_b = 1

return [grad_a, grad_x0, grad_k, grad_b]


def exponential_fit(x, a, b, c):
return a * np.exp(b * x) + c


# Scipy minimize w/ Huber loss
def get_coefficients_huber(
train_xs,
Expand Down
1 change: 0 additions & 1 deletion olmo/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,6 @@ def __exit__(self, exc_type, exc_val, exc_tb) -> None:

@dataclass
class TrainerForEval(Trainer):

def close(self, exit_code: int = 0) -> None:
gc_cuda()

Expand Down
1 change: 0 additions & 1 deletion scripts/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def dummy_init_fn(module: torch.nn.Module) -> None:
optim=optim,
scheduler=scheduler,
) as trainer:

log.info(f"Loading checkpoint from {load_path}...")
trainer.restore_checkpoint(
load_path,
Expand Down
4 changes: 0 additions & 4 deletions scripts/scaling/stacked.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def main():
pred_n = parse_size(args.target_n)
pred_d = parse_length(args.target_d, pred_n)


num_tasks = len(args.keys)
fig, axes = plt.subplots(num_tasks, 3, figsize=(6 * 3, 4.5 * num_tasks), squeeze=False)

Expand Down Expand Up @@ -391,8 +390,5 @@ def main():
print(results)





if __name__ == "__main__":
main()
11 changes: 6 additions & 5 deletions scripts/scaling/step1.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ def fit_step1(data_by_name, y_metric):
bounds=bounds,
max_iter=1000000,
disp=False,
return_cov=True
return_cov=True,
)
else:
raise ValueError(f"Unknown y_metric: {y_metric}")

return coefficients, cov


def predict_step1(data_by_name, coefficients, y_metric):
def predict_step1(configs, data_by_name, coefficients, y_metric):
predicted_data_by_name = {}
plotted_predicted_data_by_name = {}

Expand All @@ -109,7 +109,7 @@ def predict_step1(data_by_name, coefficients, y_metric):
"ys": [func([n, d], coefficients) for n, d in zip(ns, ds)],
}

if data["mode"] == "eval":
if configs[name].mode == "eval":
predicted_data = predicted_data_by_name[name]
for d, y, y_pred in zip(data["ds"], data["ys"], predicted_data["ys"]):
rel_error = (y_pred - y) / y
Expand Down Expand Up @@ -216,12 +216,13 @@ def main():

# make predictions
predicted_data_by_name, plotted_predicted_data_by_name, (y, y_pred, rel_error) = predict_step1(
data_by_name, coefficients, y_metric=args.y_metric
configs, data_by_name, coefficients, y_metric=args.y_metric
)
results += f"\n{task_name} | {prettify(y, False)} | {prettify(y_pred, False)} | {prettify(rel_error)}"

if args.output_path:
plot_step1(configs,
plot_step1(
configs,
data_by_name,
predicted_data_by_name,
plotted_predicted_data_by_name,
Expand Down
212 changes: 127 additions & 85 deletions scripts/scaling/step2.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,117 @@ def fit_step2(data_by_name, task_name, y_metric):
return coefficients, cov


def predict_step2(configs, data_by_name, coefficients, cov, y_metric):
predicted_data_by_name = {}
for name, data in data_by_name.items():
config = configs[name]
predicted_data_by_name[name] = {
"xs": data["xs"],
"ys": [sigmoid(x, *coefficients) for x in data["xs"]],
}
if config.mode == "eval":
for x, y, y_pred in zip(data["xs"], data["ys"], predicted_data_by_name[name]["ys"]):
rel_error = (y_pred - y) / y
std_error = get_std_errors([x], [y_pred], coefficients, cov, sigmoid_fit, grad_sigmoid_fit)[0]
delta_error = 1.96 * std_error

xmin = 0.9 * min(min(data["xs"]) for data in data_by_name.values())
xmax = max(max(data["xs"]) for data in data_by_name.values())
xs = np.linspace(xmin, xmax, 100)
plotted_predicted_data = {
"xs": xs,
"ys": [sigmoid(x, *coefficients) for x in xs],
}

return predicted_data_by_name, plotted_predicted_data, (y, y_pred, rel_error, delta_error)


def plot_step2(
configs,
data_by_name,
predicted_data_by_name,
plotted_predicted_data,
task_name,
fit_str,
y_metric,
coefficients,
cov,
ax=plt.gca(),
):
std_errors = get_std_errors(
plotted_predicted_data["xs"],
plotted_predicted_data["ys"],
coefficients,
cov,
sigmoid_fit,
grad_sigmoid_fit,
)

# Compute prediction intervals
plotted_y_lower = plotted_predicted_data["ys"] - 1.96 * std_errors
plotted_y_upper = plotted_predicted_data["ys"] + 1.96 * std_errors
unsigned_rel_errs = []
for name, data in data_by_name.items():
config = configs[name]
predicted_data = predicted_data_by_name[name]

ax.scatter(
data["xs"],
data["ys"],
color=config.color,
marker="o",
s=10,
label=f"{config.label} ({'fitted' if config.mode == 'train' else 'predicted'})",
)
for x, y, y_pred in zip(data["xs"], data["ys"], predicted_data["ys"]):
rel_error = (y_pred - y) / y

if config.mode == "train":
unsigned_rel_errs.append(abs(rel_error))
else:
ax.annotate(
f"{np.abs(rel_error) * 100:.1f}%",
(x, y),
textcoords="offset points",
xytext=(3, 3),
ha="left",
va="bottom",
fontsize=8,
color=config.color,
)
avg_unsigned_rel_err = np.mean(unsigned_rel_errs)

# plot the fitted curve
ax.plot(
plotted_predicted_data["xs"],
plotted_predicted_data["ys"],
color="black",
linestyle="--",
linewidth=1.5,
)

ax.fill_between(plotted_predicted_data["xs"], plotted_y_lower, plotted_y_upper, color="pink", alpha=0.3)

ax.legend(loc="lower right", ncols=1, fontsize=8)
ax.set_xlabel("Task loss")
if y_metric == "rc_acc":
ax.set_ylabel("Task RC accuracy")
elif y_metric == "mc_acc":
ax.set_ylabel("Task MC accuracy")
else:
raise ValueError(f"Invalid y_metric: {args.y_metric}")
ax.set_ylim([0, 1.0])
ax.set_title(
f"{task_name}\n{fit_str}\navg rel error on fitting = {avg_unsigned_rel_err * 100:.2f}%",
fontsize=9,
)


def str_sigmoid(coefficients):
a, x0, k, b = coefficients
return f"Acc(L) = {a:.2f} / (1 + e^(-{k:.2f}(L - {x0:.2f}))) + {b:.2f}"


def main():
args = parse_args()

Expand All @@ -97,95 +208,26 @@ def main():
a, x0, k, b = coefficients

# make predictions
predicted_data_by_name = {}
for name, data in data_by_name.items():
config = configs[name]
predicted_data_by_name[name] = {
"xs": data["xs"],
"ys": [sigmoid(x, *coefficients) for x in data["xs"]],
}
xmin = 0.9 * min(min(data["xs"]) for data in data_by_name.values())
xmax = max(max(data["xs"]) for data in data_by_name.values())
xs = np.linspace(xmin, xmax, 100)
plotted_predicted_data = {
"xs": xs,
"ys": [sigmoid(x, *coefficients) for x in xs],
}

std_errors = get_std_errors(plotted_predicted_data["xs"], plotted_predicted_data["ys"], coefficients, cov, sigmoid_fit, grad_sigmoid_fit)

# Compute prediction intervals
plotted_y_lower = plotted_predicted_data["ys"] - 1.96 * std_errors
plotted_y_upper = plotted_predicted_data["ys"] + 1.96 * std_errors
predicted_data_by_name, plotted_predicted_data, (y, y_pred, rel_error, delta_error) = predict_step2(
configs, data_by_name, coefficients, cov, y_metric=args.y_metric
)

ax = axes[i // num_cols][i % num_cols]
results += f"\n{task_name} | {prettify(y, False)} | {prettify(y_pred, False)} | {prettify(rel_error)}"

# plot the actual and predicted data
unsigned_rel_errs = []
for name, data in data_by_name.items():
config = configs[name]
predicted_data = predicted_data_by_name[name]

ax.scatter(
data["xs"],
data["ys"],
color=config.color,
marker="o",
s=10,
label=f"{config.label} ({'fitted' if config.mode == 'train' else 'predicted'})",
)
for x, y, y_pred in zip(data["xs"], data["ys"], predicted_data["ys"]):
rel_error = (y_pred - y) / y
std_error = get_std_errors([x], [y_pred], coefficients, cov, sigmoid_fit, grad_sigmoid_fit)[0]
delta_error = 1.96 * std_error
y_lower = y_pred - 1.96 * std_error
y_upper = y_pred + 1.96 * std_error
rel_error_lower = (y_lower - y) / y
rel_error_upper = (y_upper - y) / y

if config.mode == "train":
unsigned_rel_errs.append(abs(rel_error))
else:
ax.annotate(
f"{np.abs(rel_error) * 100:.1f}%",
(x, y),
textcoords="offset points",
xytext=(3, 3),
ha="left",
va="bottom",
fontsize=8,
color=config.color,
)
results += (
f"\n{task_name} | {prettify(y, False)} | {prettify(y_pred, False)} ± {prettify(delta_error, False)} | {prettify(rel_error)}"
)
avg_unsigned_rel_err = np.mean(unsigned_rel_errs)

# plot the fitted curve
ax.plot(
plotted_predicted_data["xs"],
plotted_predicted_data["ys"],
color="black",
linestyle="--",
linewidth=1.5,
)

ax.fill_between(
plotted_predicted_data["xs"], plotted_y_lower, plotted_y_upper, color="pink", alpha=0.3
)
ax = axes[i // num_cols][i % num_cols]

ax.legend(loc="lower right", ncols=1, fontsize=8)
ax.set_xlabel("Task loss")
if args.y_metric == "rc_acc":
ax.set_ylabel("Task RC accuracy")
elif args.y_metric == "mc_acc":
ax.set_ylabel("Task MC accuracy")
else:
raise ValueError(f"Invalid y_metric: {args.y_metric}")
ax.set_ylim([0, 1.0])
ax.set_title(
f"{task_name}\nAcc(L) = {a:.2f} / (1 + e^(-{k:.2f}(L - {x0:.2f}))) + {b:.2f}\navg rel error on fitting = {avg_unsigned_rel_err * 100:.2f}%",
fontsize=9,
plot_step2(
configs,
data_by_name,
predicted_data_by_name,
plotted_predicted_data,
task_name,
str_sigmoid(coefficients),
args.y_metric,
coefficients,
cov,
ax=ax,
)

fig.tight_layout()
Expand Down

0 comments on commit 6e3a1c2

Please sign in to comment.