Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support slice_p in Prodigy optimizer #550

Merged
merged 5 commits into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions modules/ui/OptimizerParamsWindow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
self.protocol("WM_DELETE_WINDOW", self.on_window_close)

self.title("Optimizer Settings")
self.geometry("800x400")
self.geometry("800x500")
self.resizable(True, True)
self.wait_visibility()
self.grab_set()
Expand Down Expand Up @@ -142,8 +142,8 @@ def create_dynamic_ui(
'r': {'title': 'R', 'tooltip': 'EMA factor.', 'type': 'float'},
'adanorm': {'title': 'AdaNorm', 'tooltip': 'Whether to use the AdaNorm variant', 'type': 'bool'},
'adam_debias': {'title': 'Adam Debias', 'tooltip': 'Only correct the denominator to avoid inflating step sizes early in training.', 'type': 'bool'},
'slice_p': {'title': 'Slice parameters', 'tooltip': 'Reduce memory usage by calculating LR adaptation statistics on only every pth entry of each tensor. For values greater than 1 this is an approximation to standard Prodigy. Values ~11 are reasonable.', 'type': 'int'},
'cautious': {'title': 'Cautious', 'tooltip': 'Whether to use the Cautious variant.', 'type': 'bool'},

}
# @formatter:on

Expand Down
2 changes: 2 additions & 0 deletions modules/util/config/TrainConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class TrainOptimizerConfig(BaseConfig):
r: float
adanorm: bool
adam_debias: bool
slice_p: int
cautious: bool

def __init__(self, data: list[(str, Any, type, bool)]):
Expand Down Expand Up @@ -157,6 +158,7 @@ def default_values():
data.append(("r", None, float, True))
data.append(("adanorm", False, bool, False))
data.append(("adam_debias", False, bool, False))
data.append(("slice_p", None, int, True))
data.append(("cautious", False, bool, False))

return TrainOptimizerConfig(data)
Expand Down
1 change: 1 addition & 0 deletions modules/util/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ def create_optimizer(
d_coef=optimizer_config.d_coef if optimizer_config.d_coef is not None else 1.0,
growth_rate=optimizer_config.growth_rate if optimizer_config.growth_rate is not None else float('inf'),
fsdp_in_use=optimizer_config.fsdp_in_use if optimizer_config.fsdp_in_use is not None else False,
slice_p=optimizer_config.slice_p if optimizer_config.slice_p is not None else 1,
)

# ADAFactor Optimizer
Expand Down
1 change: 1 addition & 0 deletions modules/util/optimizer_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def init_model_parameters(
"d_coef": 1.0,
"growth_rate": float('inf'),
"fsdp_in_use": False,
"slice_p": 11,
},
Optimizer.DADAPT_ADA_GRAD: {
"momentum": 0,
Expand Down
2 changes: 1 addition & 1 deletion requirements-global.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ open-clip-torch==2.28.0
# optimizers
dadaptation==3.2 # dadaptation optimizers
lion-pytorch==0.2.2 # lion optimizer
prodigyopt==1.0 # prodigy optimizer
prodigyopt==1.1.1 # prodigy optimizer
schedulefree==1.3.0 # schedule-free optimizers
pytorch_optimizer==3.3.0 # pytorch optimizers

Expand Down