diff --git a/src/config/create_train_config.py b/src/config/create_train_config.py index 8f83f5d..f7a544e 100644 --- a/src/config/create_train_config.py +++ b/src/config/create_train_config.py @@ -12,7 +12,7 @@ for key in model_train_configs.keys(): model_train_configs[key]["max_learning_rate"] *= 0.08 -model_train_configs["125m"]["max_learning_rate"] = 5e-4 +model_train_configs["125m"]["max_learning_rate"] = 1.0e-3 model_train_configs["1.3b"]["warmup_steps"] = 2000 model_train_configs["1.3b"]["max_learning_rate"] = 1.0e-5