Skip to content

Embedding Layer #205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ add_library(neural-fortran
src/nf/nf_layer_submodule.f90
src/nf/nf_linear2d_layer.f90
src/nf/nf_linear2d_layer_submodule.f90
src/nf/nf_embedding_layer.f90
src/nf/nf_embedding_layer_submodule.f90
src/nf/nf_loss.f90
src/nf/nf_loss_submodule.f90
src/nf/nf_maxpool2d_layer.f90
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
| Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
|------------|------------------|------------------------|----------------------|--------------|---------------|
| Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
| Embedding | `embedding` | n/a | 2 | ✅ | ✅ |
| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ |
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
Expand Down
5 changes: 3 additions & 2 deletions src/nf.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ module nf
conv2d, &
dense, &
dropout, &
embedding, &
flatten, &
input, &
layernorm, &
linear2d, &
maxpool2d, &
reshape, &
self_attention, &
layernorm
self_attention
use nf_loss, only: mse, quadratic
use nf_metrics, only: corr, maxabs
use nf_network, only: network
Expand Down
98 changes: 98 additions & 0 deletions src/nf/nf_embedding_layer.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
module nf_embedding_layer

use nf_activation, only: activation_function
use nf_base_layer, only: base_layer

implicit none

private
public :: embedding_layer

type, extends(base_layer) :: embedding_layer
!! Embedding Layer
!! Stores inputs as a trainable lookup table. Inputs are
!! integer indicies in a dictionary of `vocab_size`.
!! This layer converts them into a table of shape
!! (`sequence_length`, `model_dimension`)
integer :: sequence_length, vocab_size, model_dimension
integer :: positional

real, allocatable :: weights(:, :)
real, allocatable :: output(:, :)
real, allocatable :: dw(:, :) ! weight gradients

contains

procedure :: backward
procedure :: forward
procedure :: positional_trigonometric
procedure :: positional_absolute
procedure :: init
procedure :: get_num_params
procedure :: get_params
procedure :: get_gradients
procedure :: set_params

end type embedding_layer

interface embedding_layer
module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
integer, intent(in) :: vocab_size, model_dimension
integer, optional :: positional
type(embedding_layer) :: res
end function embedding_layer_cons
end interface embedding_layer

interface
pure module subroutine forward(self, input)
!! Get vectors by indicis in the dictionary
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input(:)
end subroutine forward

pure module subroutine backward(self, input, gradient)
!! Update gradient at `input` indices
!! dw_i = W_i + d_output_i
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input(:)
real, intent(in) :: gradient(:, :)
end subroutine backward

pure module subroutine positional_trigonometric(self, pos)
!! Sum embedding with positional info (trigonometric, not trianable)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: pos
end subroutine positional_trigonometric

pure module subroutine positional_absolute(self, pos)
!! Sum embedding with absolute position
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: pos
end subroutine positional_absolute

module subroutine init(self, input_shape)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input_shape(:)
end subroutine init

pure module function get_num_params(self) result(num_params)
class(embedding_layer), intent(in) :: self
integer :: num_params
end function get_num_params

module function get_params(self) result(params)
class(embedding_layer), intent(in), target :: self
real, allocatable :: params(:)
end function get_params

module function get_gradients(self) result(gradients)
class(embedding_layer), intent(in), target :: self
real, allocatable :: gradients(:)
end function get_gradients

module subroutine set_params(self, params)
class(embedding_layer), intent(in out) :: self
real, intent(in), target :: params(:)
end subroutine set_params
end interface
end module nf_embedding_layer
137 changes: 137 additions & 0 deletions src/nf/nf_embedding_layer_submodule.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#define NONE 0
#define TRIGONOMETRIC 1
#define ABSOLUTE 2

submodule(nf_embedding_layer) nf_embedding_layer_submodule
use nf_base_layer, only: base_layer
implicit none
contains
module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
integer, intent(in) :: vocab_size, model_dimension
integer, optional :: positional
type(embedding_layer) :: res

res % vocab_size = vocab_size
res % model_dimension = model_dimension
if (.not. present(positional)) then
res % positional = NONE
else
res % positional = positional
end if
end function embedding_layer_cons

module subroutine init(self, input_shape)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input_shape(:)

self % sequence_length = input_shape(1)

allocate(self % output(self % sequence_length, self % model_dimension))

allocate(self % weights(self % vocab_size, self % model_dimension))
self % weights = 0.1

allocate(self % dw(self % vocab_size, self % model_dimension))
self % dw = 0.0
end subroutine init

pure module subroutine forward(self, input)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input(:)
integer :: i, index

do concurrent(i = 1: self % sequence_length)
index = input(i)
if (index > size(self % weights, 1)) then
index = 1
elseif (index == 0) then
index = 1
end if

self % output(i, :) = self % weights(index, :)

if (self % positional == TRIGONOMETRIC) then
call self % positional_trigonometric(i)
elseif (self % positional == ABSOLUTE) then
call self % positional_absolute(i)
end if
end do
end subroutine forward

pure module subroutine backward(self, input, gradient)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: input(:)
real, intent(in) :: gradient(:, :)
integer :: i

do concurrent(i = 1: self % sequence_length)
self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :)
end do
end subroutine backward

pure module subroutine positional_trigonometric(self, pos)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: pos
integer :: i
real :: theta

do concurrent(i = 1: floor(real(self % model_dimension) / 2))
theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension)
self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta)
self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta)
end do
end subroutine positional_trigonometric

pure module subroutine positional_absolute(self, pos)
class(embedding_layer), intent(in out) :: self
integer, intent(in) :: pos
integer :: i

do concurrent(i = 1: self % model_dimension)
self % output(pos, i) = self % output(pos, i) + pos - 1
end do
end subroutine positional_absolute

pure module function get_num_params(self) result(num_params)
class(embedding_layer), intent(in) :: self
integer :: num_params
num_params = self % vocab_size * self % model_dimension
end function get_num_params

module function get_params(self) result(params)
class(embedding_layer), intent(in), target :: self
real, allocatable :: params(:)
real, pointer :: w_(:) => null()

w_(1: product(shape(self % weights))) => self % weights
params = w_
end function get_params

module function get_gradients(self) result(gradients)
class(embedding_layer), intent(in), target :: self
real, allocatable :: gradients(:)
real, pointer :: dw_(:) => null()

dw_(1: product(shape(self % dw))) => self % dw
gradients = dw_
end function get_gradients

module subroutine set_params(self, params)
class(embedding_layer), intent(in out) :: self
real, intent(in), target :: params(:)

real, pointer :: p_(:,:) => null()

! check if the number of parameters is correct
if (size(params) /= self % get_num_params()) then
error stop 'Error: number of parameters does not match'
end if

associate(n => self % vocab_size * self % model_dimension)
! reshape the weights
p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n)
self % weights = p_
end associate

end subroutine set_params
end submodule nf_embedding_layer_submodule
18 changes: 18 additions & 0 deletions src/nf/nf_layer_constructors.f90
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module nf_layer_constructors
maxpool2d, &
reshape, &
self_attention, &
embedding, &
layernorm

interface input
Expand Down Expand Up @@ -233,6 +234,23 @@ module function self_attention(num_heads) result(res)
!! Resulting layer instance
end function self_attention

module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
!! Embedding layer constructor.
!!
!! This layer is for inputting token indices from the dictionary to the network.
!! Works as a trainable lookup table that converts each index into a vector.
!! Embedding layer must be the first layer in a network.
integer, intent(in) :: sequence_length
!! max len of input sequence
integer, intent(in) :: vocab_size
!! length of token vocabulary
integer, intent(in) :: model_dimension
!! size of target embeddings
integer, optional, intent(in) :: positional
!! positional encoding
type(layer) :: res
end function embedding

module function layernorm() result(res)
!! Layer Normalization
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
Expand Down
21 changes: 20 additions & 1 deletion src/nf/nf_layer_constructors_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use nf_reshape_layer, only: reshape3d_layer
use nf_linear2d_layer, only: linear2d_layer
use nf_self_attention_layer, only: self_attention_layer
use nf_embedding_layer, only: embedding_layer
use nf_layernorm_layer, only: layernorm_layer
use nf_activation, only: activation_function, relu, sigmoid

Expand Down Expand Up @@ -172,6 +173,7 @@ module function linear2d(out_features) result(res)

end function linear2d


module function self_attention(num_heads) result(res)
integer, intent(in) :: num_heads
type(layer) :: res
Expand All @@ -180,9 +182,26 @@ module function self_attention(num_heads) result(res)
allocate(res % p, source=self_attention_layer(num_heads))
end function self_attention

module function layernorm() result(res)

module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
integer, intent(in) :: sequence_length, vocab_size, model_dimension
integer, optional, intent(in) :: positional
type(layer) :: res
type(embedding_layer) :: embedding_layer_instance

embedding_layer_instance = embedding_layer(vocab_size, model_dimension, positional)
call embedding_layer_instance % init([sequence_length])
res % name = 'embedding'
res % layer_shape = [sequence_length, model_dimension]
res % input_layer_shape = [integer ::]
allocate(res % p, source=embedding_layer_instance)
res % initialized = .true.

end function embedding


module function layernorm() result(res)
type(layer) :: res
res % name = 'layernorm'
allocate(res % p, source=layernorm_layer())
end function layernorm
Expand Down
Loading