Nerogar · Nerogar · Dec 28, 2024 · Nov 8, 2024 · Nov 15, 2024 · Dec 18, 2024
diff --git a/modules/ui/OptimizerParamsWindow.py b/modules/ui/OptimizerParamsWindow.py
@@ -32,7 +32,7 @@ def __init__(
         self.protocol("WM_DELETE_WINDOW", self.on_window_close)
 
         self.title("Optimizer Settings")
-        self.geometry("800x400")
+        self.geometry("800x500")
         self.resizable(True, True)
         self.wait_visibility()
         self.grab_set()
@@ -142,8 +142,8 @@ def create_dynamic_ui(
             'r': {'title': 'R', 'tooltip': 'EMA factor.', 'type': 'float'},
             'adanorm': {'title': 'AdaNorm', 'tooltip': 'Whether to use the AdaNorm variant', 'type': 'bool'},
             'adam_debias': {'title': 'Adam Debias', 'tooltip': 'Only correct the denominator to avoid inflating step sizes early in training.', 'type': 'bool'},
+            'slice_p': {'title': 'Slice parameters', 'tooltip': 'Reduce memory usage by calculating LR adaptation statistics on only every pth entry of each tensor. For values greater than 1 this is an approximation to standard Prodigy. Values ~11 are reasonable.', 'type': 'int'},
             'cautious': {'title': 'Cautious', 'tooltip': 'Whether to use the Cautious variant.', 'type': 'bool'},
-
         }
         # @formatter:on
 

diff --git a/modules/util/config/TrainConfig.py b/modules/util/config/TrainConfig.py
@@ -89,6 +89,7 @@ class TrainOptimizerConfig(BaseConfig):
     r: float
     adanorm: bool
     adam_debias: bool
+    slice_p: int
     cautious: bool
 
     def __init__(self, data: list[(str, Any, type, bool)]):
@@ -157,6 +158,7 @@ def default_values():
         data.append(("r", None, float, True))
         data.append(("adanorm", False, bool, False))
         data.append(("adam_debias", False, bool, False))
+        data.append(("slice_p", None, int, True))
         data.append(("cautious", False, bool, False))
 
         return TrainOptimizerConfig(data)

diff --git a/modules/util/create.py b/modules/util/create.py
@@ -829,6 +829,7 @@ def create_optimizer(
                 d_coef=optimizer_config.d_coef if optimizer_config.d_coef is not None else 1.0,
                 growth_rate=optimizer_config.growth_rate if optimizer_config.growth_rate is not None else float('inf'),
                 fsdp_in_use=optimizer_config.fsdp_in_use if optimizer_config.fsdp_in_use is not None else False,
+                slice_p=optimizer_config.slice_p if optimizer_config.slice_p is not None else 1,
             )
 
         # ADAFactor Optimizer

diff --git a/modules/util/optimizer_util.py b/modules/util/optimizer_util.py
@@ -269,6 +269,7 @@ def init_model_parameters(
         "d_coef": 1.0,
         "growth_rate": float('inf'),
         "fsdp_in_use": False,
+        "slice_p": 11,
     },
     Optimizer.DADAPT_ADA_GRAD: {
         "momentum": 0,

diff --git a/requirements-global.txt b/requirements-global.txt
@@ -31,7 +31,7 @@ open-clip-torch==2.28.0
 # optimizers
 dadaptation==3.2 # dadaptation optimizers
 lion-pytorch==0.2.2 # lion optimizer
-prodigyopt==1.0 # prodigy optimizer
+prodigyopt==1.1.1 # prodigy optimizer
 schedulefree==1.3.0 # schedule-free optimizers
 pytorch_optimizer==3.3.0 # pytorch optimizers