diff --git a/src/layers/rotary.jl b/src/layers/rotary.jl index f7f8d08d13..8c90bf68a0 100644 --- a/src/layers/rotary.jl +++ b/src/layers/rotary.jl @@ -1,19 +1,19 @@ """ Rotary Position Embeddings (RoPE) -This is a port of the RoPE implementation from NeuralAttentionlib.jl, which is an implementation of +This is a port and simplified code of the RoPE implementation from NeuralAttentionlib.jl, which is an implementation of the Rotary Position Embeddings (RoPE) described in the RoFormer paper. Original sources: - Paper: "RoFormer: Enhanced Transformer with Rotary Position Embedding" Authors: Jianlin Su, Yu Lu, Shengfeng Pan, Ahmed Murtadha, Bo Wen URL: https://arxiv.org/abs/2104.09864 - + - Code: NeuralAttentionlib.jl Author: chengchingwen Repository: https://github.com/chengchingwen/NeuralAttentionlib.jl -RoPE encodes absolute positional information with a rotation matrix that naturally +RoPE encodes absolute positional information with a rotation matrix that naturally incorporates explicit relative position dependency in self-attention formulation. """ diff --git a/test/layers/rotary.jl b/test/layers/rotary.jl new file mode 100644 index 0000000000..8fcb98105a --- /dev/null +++ b/test/layers/rotary.jl @@ -0,0 +1,32 @@ +using Flux: with_rotary_position_embedding + +@testset "Rotary Position Embedding Tests" begin + Random.seed!(123) + test_sizes = [(2,2), (4,6), (8,10)] + + for (n, d) in test_sizes + x = randn(n, d) + test_gradients( + with_rotary_position_embedding, + x; + rtol=1e-4, + atol=1e-4, + test_gpu=false, + compare_finite_diff=true, + loss=(f, x) -> sum(f(x)) + ) + end + + # Edge cases + test_gradients( + with_rotary_position_embedding, + zeros(4, 6); + loss=(f, x) -> sum(f(x)) + ) + + test_gradients( + with_rotary_position_embedding, + ones(4, 6); + loss=(f, x) -> sum(f(x)) + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index 6f5a2e7d84..983ee67eab 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -55,6 +55,7 @@ Random.seed!(0) include("layers/upsample.jl") include("layers/show.jl") include("layers/macro.jl") + include("layers/rotary.jl") end @testset "outputsize" begin