address #33

lucidrains · Nov 21, 2024 · 7f6cd75 · 7f6cd75
1 parent dee87fb
commit 7f6cd75
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 5 deletions.
diff --git a/ema_pytorch/ema_pytorch.py b/ema_pytorch/ema_pytorch.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from typing import Callable
 
 from copy import deepcopy
 from functools import partial
@@ -64,7 +65,7 @@ class EMA(Module):
     def __init__(
         self,
         model: Module,
-        ema_model: Module | None = None,             # if your model has lazylinears or other types of non-deepcopyable modules, you can pass in your own ema model
+        ema_model: Module | Callable[[], Module] | None = None,             # if your model has lazylinears or other types of non-deepcopyable modules, you can pass in your own ema model
         beta = 0.9999,
         update_after_step = 100,
         update_every = 10,
@@ -82,7 +83,7 @@ def __init__(
         forward_method_names: tuple[str, ...] = (),
         move_ema_to_online_device = False,
         coerce_dtype = False,
-        lazy_init_ema = False
+        lazy_init_ema = False,
     ):
         super().__init__()
         self.beta = beta
@@ -98,6 +99,11 @@ def __init__(
         else:
             self.online_model = [model] # hack
 
+        # handle callable returning ema module
+
+        if callable(ema_model):
+            ema_model = ema_model()
+
         # ema model
 
         self.ema_model = None

diff --git a/ema_pytorch/post_hoc_ema.py b/ema_pytorch/post_hoc_ema.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from typing import Callable
 
 from pathlib import Path
 from copy import deepcopy
@@ -53,7 +54,7 @@ def __init__(
         model: Module,
         sigma_rel: float | None = None,
         gamma: float | None = None,
-        ema_model: Module | None = None,           # if your model has lazylinears or other types of non-deepcopyable modules, you can pass in your own ema model
+        ema_model: Module | Callable[[], Module] | None = None,           # if your model has lazylinears or other types of non-deepcopyable modules, you can pass in your own ema model
         update_every: int = 100,
         frozen: bool = False,
         param_or_buffer_names_no_ema: Set[str] = set(),
@@ -74,6 +75,11 @@ def __init__(
 
         self.online_model = [model]
 
+        # handle callable returning ema module
+
+        if callable(ema_model):
+            ema_model = ema_model()
+
         # ema model
 
         self.ema_model = ema_model
@@ -274,6 +280,7 @@ class PostHocEMA(Module):
     def __init__(
         self,
         model: Module,
+        ema_model: Callable[[], Module] | None = None,
         sigma_rels: Tuple[float, ...] | None = None,
         gammas: Tuple[float, ...] | None = None,
         checkpoint_every_num_steps: int = 1000,
@@ -290,11 +297,13 @@ def __init__(
         assert len(gammas) > 1, 'at least 2 ema models with different gammas in order to synthesize new ema models of a different gamma'
         assert len(set(gammas)) == len(gammas), 'calculated gammas must be all unique'
 
+        self.maybe_ema_model = ema_model
+
         self.gammas = gammas
         self.num_ema_models = len(gammas)
 
         self._model = [model]
-        self.ema_models = ModuleList([KarrasEMA(model, gamma = gamma, **kwargs) for gamma in gammas])
+        self.ema_models = ModuleList([KarrasEMA(model, ema_model = ema_model, gamma = gamma, **kwargs) for gamma in gammas])
 
         self.checkpoint_folder = Path(checkpoint_folder)
         self.checkpoint_folder.mkdir(exist_ok = True, parents = True)
@@ -355,6 +364,7 @@ def synthesize_ema_model(
 
         synthesized_ema_model = KarrasEMA(
             model = self.model,
+            ema_model = self.maybe_ema_model,
             gamma = gamma,
             **self.ema_kwargs
         )
@@ -392,6 +402,7 @@ def synthesize_ema_model(
 
         tmp_ema_model = KarrasEMA(
             model = self.model,
+            ema_model = self.maybe_ema_model,
             gamma = gamma,
             **self.ema_kwargs
         )

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'ema-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.7.3',
+  version = '0.7.4',
   license='MIT',
   description = 'Easy way to keep track of exponential moving average version of your pytorch module',
   author = 'Phil Wang',