Skip to content

Commit

Permalink
make LoRA's weight initialization overridable
Browse files Browse the repository at this point in the history
  • Loading branch information
Laurent2916 committed Mar 13, 2024
1 parent c1b3a52 commit b8fae60
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/refiners/fluxion/adapters/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
class Lora(Generic[T], fl.Chain, ABC):
"""Low-Rank Adaptation (LoRA) layer.
This layer is composed of two [`WeightedModule`][refiners.fluxion.layers.WeightedModule]:
- `down`: initialized with a random normal distribution
- `up`: initialized with zeros
This layer's purpose is to approximate a given layer by two smaller layers:
the [`down`][refiners.fluxion.adapters.lora.Lora.down] layer (aka A) and the [`up`][refiners.fluxion.adapters.lora.Lora.up] layer (aka B).
See [[ arXiv:2106.09685] LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685) for more details.
Note:
This layer is not meant to be used directly.
Expand Down Expand Up @@ -53,7 +52,10 @@ def __init__(
*self.lora_layers(device=device, dtype=dtype),
fl.Multiply(scale),
)
self.reset_parameters()

def reset_parameters(self) -> None:
"""Reset the parameters of up and down layers."""
normal_(tensor=self.down.weight, std=1 / self.rank)
zeros_(tensor=self.up.weight)

Expand Down

0 comments on commit b8fae60

Please sign in to comment.