Skip to content

Commit

Permalink
diverges on language model training with default beta2 in adopt, brin…
Browse files Browse the repository at this point in the history
…g it down to 0.99
  • Loading branch information
lucidrains committed Nov 22, 2024
1 parent f6ab117 commit 57ffaf3
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion adam_atan2_pytorch/adopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(
self,
params,
lr = 1e-4,
betas: tuple[float, float] = (0.9, 0.9999),
betas: tuple[float, float] = (0.9, 0.99),
eps = 1e-6,
weight_decay = 0.,
decoupled_wd = True
Expand Down
3 changes: 2 additions & 1 deletion adam_atan2_pytorch/adopt_atan2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def exists(val):
class AdoptAtan2(Optimizer):
"""
the proposed Adam substitute from University of Tokyo
combined with the proposed atan2 method for ridding of the eps from Google
Algorithm 2 in https://arxiv.org/abs/2411.02853
"""
Expand All @@ -23,7 +24,7 @@ def __init__(
self,
params,
lr = 1e-4,
betas: tuple[float, float] = (0.9, 0.9999),
betas: tuple[float, float] = (0.9, 0.99),
weight_decay = 0.,
decoupled_wd = True,
a = 1.27,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "adam-atan2-pytorch"
version = "0.1.5"
version = "0.1.6"
description = "Adam-atan2 for Pytorch"
authors = [
{ name = "Phil Wang", email = "[email protected]" }
Expand Down

0 comments on commit 57ffaf3

Please sign in to comment.