make LoRA's weight initialization overridable

finegrain-ai · Mar 13, 2024 · b8fae60 · b8fae60
1 parent c1b3a52
commit b8fae60
Showing 1 changed file with 6 additions and 4 deletions.
diff --git a/src/refiners/fluxion/adapters/lora.py b/src/refiners/fluxion/adapters/lora.py
@@ -14,10 +14,9 @@
 class Lora(Generic[T], fl.Chain, ABC):
     """Low-Rank Adaptation (LoRA) layer.
 
-    This layer is composed of two [`WeightedModule`][refiners.fluxion.layers.WeightedModule]:
-
-    - `down`: initialized with a random normal distribution
-    - `up`: initialized with zeros
+    This layer's purpose is to approximate a given layer by two smaller layers:
+    the [`down`][refiners.fluxion.adapters.lora.Lora.down] layer (aka A) and the [`up`][refiners.fluxion.adapters.lora.Lora.up] layer (aka B).
+    See [[ arXiv:2106.09685] LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685) for more details.
 
     Note:
         This layer is not meant to be used directly.
@@ -53,7 +52,10 @@ def __init__(
             *self.lora_layers(device=device, dtype=dtype),
             fl.Multiply(scale),
         )
+        self.reset_parameters()
 
+    def reset_parameters(self) -> None:
+        """Reset the parameters of up and down layers."""
         normal_(tensor=self.down.weight, std=1 / self.rank)
         zeros_(tensor=self.up.weight)