From 1c54cf0af2e4f866e53df574675b291f901fc8f7 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 11:53:14 +0400 Subject: [PATCH 01/14] embedding_layer: initial forward implementation --- src/nf/nf_embedding_layer.f90 | 77 ++++++++++++++++++++++++ src/nf/nf_embedding_submodule.f90 | 97 +++++++++++++++++++++++++++++++ test/test_embedding_layer.f90 | 14 +++++ 3 files changed, 188 insertions(+) create mode 100644 src/nf/nf_embedding_layer.f90 create mode 100644 src/nf/nf_embedding_submodule.f90 create mode 100644 test/test_embedding_layer.f90 diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 new file mode 100644 index 00000000..9074ece8 --- /dev/null +++ b/src/nf/nf_embedding_layer.f90 @@ -0,0 +1,77 @@ +module nf_embedding_layer + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: embedding_layer + + type, extends(base_layer) :: embedding_layer + integer :: sequence_length, vocab_size, model_dimension + + real, allocatable :: weights(:, :) + real, allocatable :: output(:, :) + real, allocatable :: gradient(:, :) ! input gradient + real, allocatable :: dw(:, :) ! weight gradients + + contains + + procedure :: backward + procedure :: forward + procedure :: init + procedure :: get_num_params + procedure :: get_params + procedure :: get_gradients + procedure :: set_params + + end type embedding_layer + + interface embedding_layer + module function embedding_layer_cons(& + sequence_length, vocab_size, model_dimension& + ) result(res) + integer, intent(in) :: sequence_length, vocab_size, model_dimension + type(embedding_layer) :: res + end function embedding_layer_cons + end interface embedding_layer + + interface + pure module subroutine forward(self, input) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + real, intent(in) :: gradient(:) + end subroutine backward + + module subroutine init(self, input_shape) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module function get_num_params(self) result(num_params) + class(embedding_layer), intent(in) :: self + integer :: num_params + end function get_num_params + + module function get_params(self) result(params) + class(embedding_layer), intent(in), target :: self + real, allocatable :: params(:) + end function get_params + + module function get_gradients(self) result(gradients) + class(embedding_layer), intent(in), target :: self + real, allocatable :: gradients(:) + end function get_gradients + + module subroutine set_params(self, params) + class(embedding_layer), intent(in out) :: self + real, intent(in), target :: params(:) + end subroutine set_params + end interface +end module nf_embedding_layer diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_submodule.f90 new file mode 100644 index 00000000..2d360107 --- /dev/null +++ b/src/nf/nf_embedding_submodule.f90 @@ -0,0 +1,97 @@ +submodule(nf_embedding_layer) nf_embedding_layer_submodule + use nf_base_layer, only: base_layer + implicit none +contains + module function embedding_layer_cons(& + sequence_length, vocab_size, model_dimension& + ) result(res) + integer, intent(in) :: sequence_length, vocab_size, model_dimension + type(embedding_layer) :: res + + res % vocab_size = vocab_size + res % model_dimension = model_dimension + res % sequence_length = sequence_length + end function embedding_layer_cons + + module subroutine init(self, input_shape) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + allocate(self % output(self % sequence_length, self % model_dimension)) + allocate(self % gradient(self % sequence_length, self % vocab_size)) + + allocate(self % weights(self % vocab_size, self % model_dimension)) + self % weights = 0.1 + + allocate(self % dw(self % vocab_size, self % model_dimension)) + self % dw = 0.0 + end subroutine init + + pure module subroutine forward(self, input) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + integer :: i + + do concurrent(i = 1: self % sequence_length) + self % output(i, :) = self % weights(input(i), :) + end do + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + real, intent(in) :: gradient(:) + real :: db(self % model_dimension) + real :: dw(self % vocab_size, self % model_dimension) + integer :: i + end subroutine backward + + pure module function get_num_params(self) result(num_params) + class(embedding_layer), intent(in) :: self + integer :: num_params + + ! Number of weigths times number of biases + num_params = self % vocab_size * self % model_dimension + self % model_dimension + + end function get_num_params + + + module function get_params(self) result(params) + class(embedding_layer), intent(in), target :: self + real, allocatable :: params(:) + real, pointer :: w_(:) => null() + + w_(1: product(shape(self % weights))) => self % weights + params = [w_] + end function get_params + + + module function get_gradients(self) result(gradients) + class(embedding_layer), intent(in), target :: self + real, allocatable :: gradients(:) + real, pointer :: dw_(:) => null() + + dw_(1: product(shape(self % dw))) => self % dw + gradients = [dw_] + end function get_gradients + + + module subroutine set_params(self, params) + class(embedding_layer), intent(in out) :: self + real, intent(in), target :: params(:) + + real, pointer :: p_(:,:) => null() + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + associate(n => self % vocab_size * self % model_dimension) + ! reshape the weights + p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n) + self % weights = p_ + end associate + + end subroutine set_params +end submodule nf_embedding_layer_submodule diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 new file mode 100644 index 00000000..1f0692e0 --- /dev/null +++ b/test/test_embedding_layer.f90 @@ -0,0 +1,14 @@ +program test_embedding_layer + use iso_fortran_env, only: stderr => error_unit + use nf_embedding_layer, only: embedding_layer + implicit none + + logical :: ok = .true. + integer :: sample_input(3) = [2, 1, 3] + type(embedding_layer) :: embedding + + embedding = embedding_layer(sequence_length=3, vocab_size=4, model_dimension=2) + call embedding % init([0]) + embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2]) + call embedding % forward(sample_input) +end program test_embedding_layer \ No newline at end of file From d4731a1fe930510e7b7576b7fc27aba016d091aa Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 17:12:46 +0400 Subject: [PATCH 02/14] embedding_layer: implementation of embedding layer --- src/nf/nf_embedding_layer.f90 | 16 ++++++++++----- src/nf/nf_embedding_submodule.f90 | 33 +++++++++++++++---------------- test/test_embedding_layer.f90 | 32 +++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 index 9074ece8..0cb4f923 100644 --- a/src/nf/nf_embedding_layer.f90 +++ b/src/nf/nf_embedding_layer.f90 @@ -9,6 +9,11 @@ module nf_embedding_layer public :: embedding_layer type, extends(base_layer) :: embedding_layer + !! Embedding Layer + !! Stores inputs as a trainable lookup table. Inputs are + !! integer indicies in a dictionary of `vocab_size`. + !! This layer converts them into a table of shape + !! (`sequence_length`, `model_dimension`) integer :: sequence_length, vocab_size, model_dimension real, allocatable :: weights(:, :) @@ -29,24 +34,25 @@ module nf_embedding_layer end type embedding_layer interface embedding_layer - module function embedding_layer_cons(& - sequence_length, vocab_size, model_dimension& - ) result(res) - integer, intent(in) :: sequence_length, vocab_size, model_dimension + module function embedding_layer_cons(vocab_size, model_dimension) result(res) + integer, intent(in) :: vocab_size, model_dimension type(embedding_layer) :: res end function embedding_layer_cons end interface embedding_layer interface pure module subroutine forward(self, input) + !! Get vectors by indicis in the dictionary class(embedding_layer), intent(in out) :: self integer, intent(in) :: input(:) end subroutine forward pure module subroutine backward(self, input, gradient) + !! Update gradient at `input` indices + !! dw_i = W_i + d_output_i class(embedding_layer), intent(in out) :: self integer, intent(in) :: input(:) - real, intent(in) :: gradient(:) + real, intent(in) :: gradient(:, :) end subroutine backward module subroutine init(self, input_shape) diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_submodule.f90 index 2d360107..fd7d6bc6 100644 --- a/src/nf/nf_embedding_submodule.f90 +++ b/src/nf/nf_embedding_submodule.f90 @@ -2,21 +2,20 @@ use nf_base_layer, only: base_layer implicit none contains - module function embedding_layer_cons(& - sequence_length, vocab_size, model_dimension& - ) result(res) - integer, intent(in) :: sequence_length, vocab_size, model_dimension + module function embedding_layer_cons(vocab_size, model_dimension) result(res) + integer, intent(in) :: vocab_size, model_dimension type(embedding_layer) :: res res % vocab_size = vocab_size res % model_dimension = model_dimension - res % sequence_length = sequence_length end function embedding_layer_cons module subroutine init(self, input_shape) class(embedding_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) + self % sequence_length = input_shape(1) + allocate(self % output(self % sequence_length, self % model_dimension)) allocate(self % gradient(self % sequence_length, self % vocab_size)) @@ -30,32 +29,34 @@ end subroutine init pure module subroutine forward(self, input) class(embedding_layer), intent(in out) :: self integer, intent(in) :: input(:) - integer :: i + integer :: i, index do concurrent(i = 1: self % sequence_length) - self % output(i, :) = self % weights(input(i), :) + index = input(i) + if (index > size(self % weights, 1)) then + index = 1 + end if + self % output(i, :) = self % weights(index, :) end do end subroutine forward pure module subroutine backward(self, input, gradient) class(embedding_layer), intent(in out) :: self integer, intent(in) :: input(:) - real, intent(in) :: gradient(:) - real :: db(self % model_dimension) - real :: dw(self % vocab_size, self % model_dimension) + real, intent(in) :: gradient(:, :) integer :: i + + do concurrent(i = 1: self % sequence_length) + self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :) + end do end subroutine backward pure module function get_num_params(self) result(num_params) class(embedding_layer), intent(in) :: self integer :: num_params - - ! Number of weigths times number of biases - num_params = self % vocab_size * self % model_dimension + self % model_dimension - + num_params = self % vocab_size * self % model_dimension end function get_num_params - module function get_params(self) result(params) class(embedding_layer), intent(in), target :: self real, allocatable :: params(:) @@ -65,7 +66,6 @@ module function get_params(self) result(params) params = [w_] end function get_params - module function get_gradients(self) result(gradients) class(embedding_layer), intent(in), target :: self real, allocatable :: gradients(:) @@ -75,7 +75,6 @@ module function get_gradients(self) result(gradients) gradients = [dw_] end function get_gradients - module subroutine set_params(self, params) class(embedding_layer), intent(in out) :: self real, intent(in), target :: params(:) diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 index 1f0692e0..205ce3ca 100644 --- a/test/test_embedding_layer.f90 +++ b/test/test_embedding_layer.f90 @@ -5,10 +5,36 @@ program test_embedding_layer logical :: ok = .true. integer :: sample_input(3) = [2, 1, 3] + real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2]) + real :: output_flat(6) + real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6]) + real :: dw_flat(8) + real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8]) type(embedding_layer) :: embedding - embedding = embedding_layer(sequence_length=3, vocab_size=4, model_dimension=2) - call embedding % init([0]) + embedding = embedding_layer(vocab_size=4, model_dimension=2) + call embedding % init([3]) embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2]) + call embedding % forward(sample_input) -end program test_embedding_layer \ No newline at end of file + + output_flat = reshape(embedding % output, [6]) + if (.not. all(output_flat.eq.expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + + call embedding % backward(sample_input, sample_gradient) + dw_flat = reshape(embedding % dw, shape(dw_flat)) + if (.not. all(dw_flat.eq.expected_dw_flat)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect dw values.. failed' + end if + + if (ok) then + print '(a)', 'test_embedding_layer: All tests passed.' + else + write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.' + stop 1 + end if +end program test_embedding_layer From e6b54de5cfe1b6bd547af9737d2523cfebc2d258 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 19 Feb 2025 23:43:29 +0400 Subject: [PATCH 03/14] embedding_layer: remove gradient attribute --- src/nf/nf_embedding_layer.f90 | 1 - ..._embedding_submodule.f90 => nf_embedding_layer_submodule.f90} | 1 - 2 files changed, 2 deletions(-) rename src/nf/{nf_embedding_submodule.f90 => nf_embedding_layer_submodule.f90} (97%) diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 index 0cb4f923..e4f3e10e 100644 --- a/src/nf/nf_embedding_layer.f90 +++ b/src/nf/nf_embedding_layer.f90 @@ -18,7 +18,6 @@ module nf_embedding_layer real, allocatable :: weights(:, :) real, allocatable :: output(:, :) - real, allocatable :: gradient(:, :) ! input gradient real, allocatable :: dw(:, :) ! weight gradients contains diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 similarity index 97% rename from src/nf/nf_embedding_submodule.f90 rename to src/nf/nf_embedding_layer_submodule.f90 index fd7d6bc6..5ae5421a 100644 --- a/src/nf/nf_embedding_submodule.f90 +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -17,7 +17,6 @@ module subroutine init(self, input_shape) self % sequence_length = input_shape(1) allocate(self % output(self % sequence_length, self % model_dimension)) - allocate(self % gradient(self % sequence_length, self % vocab_size)) allocate(self % weights(self % vocab_size, self % model_dimension)) self % weights = 0.1 From 48efd075cfa65ca7eea4a997984bc526f4d749b8 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 01:00:29 +0400 Subject: [PATCH 04/14] embedding_layer: guard against zeros --- src/nf/nf_embedding_layer_submodule.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 index 5ae5421a..5e38f22e 100644 --- a/src/nf/nf_embedding_layer_submodule.f90 +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -34,6 +34,8 @@ pure module subroutine forward(self, input) index = input(i) if (index > size(self % weights, 1)) then index = 1 + elseif (index == 0) then + index = 1 end if self % output(i, :) = self % weights(index, :) end do From 4cdd2e52580373ece52d57bd7c9c681dc3345a08 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 01:01:00 +0400 Subject: [PATCH 05/14] embedding_layer: plumbing --- src/nf.f90 | 3 ++- src/nf/nf_layer_constructors.f90 | 26 +++++++++++++--------- src/nf/nf_layer_constructors_submodule.f90 | 18 +++++++++++++++ src/nf/nf_layer_submodule.f90 | 21 +++++++++++++++++ src/nf/nf_network_submodule.f90 | 10 ++++++--- 5 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/nf.f90 b/src/nf.f90 index 39f67ea3..be392154 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -11,7 +11,8 @@ module nf linear2d, & maxpool2d, & reshape, & - self_attention + self_attention, & + embedding use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index db60cf0f..fb99f502 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -17,7 +17,8 @@ module nf_layer_constructors linear2d, & maxpool2d, & reshape, & - self_attention + self_attention, & + embedding interface input @@ -222,15 +223,20 @@ module function linear2d(out_features) result(res) !! Resulting layer instance end function linear2d - module function self_attention(num_heads) result(res) - !! Rank-2 (sequence_length, out_features) self attention constructor. - !! sequence_length and model_dimension are determined at layer initialization, based on the - !! output shape of the previous layer. - integer, intent(in) :: num_heads - !! Number of attention heads - type(layer) :: res - !! Resulting layer instance - end function self_attention + module function self_attention(num_heads) result(res) + !! Rank-2 (sequence_length, out_features) self attention constructor. + !! sequence_length and model_dimension are determined at layer initialization, based on the + !! output shape of the previous layer. + integer, intent(in) :: num_heads + !! Number of attention heads + type(layer) :: res + !! Resulting layer instance + end function self_attention + + module function embedding(sequence_length, vocab_size, model_dimension) result(res) + integer, intent(in) :: sequence_length, vocab_size, model_dimension + type(layer) :: res + end function embedding end interface diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 9e5322c1..a10f4c81 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_activation, only: activation_function, relu, sigmoid implicit none @@ -171,6 +172,7 @@ module function linear2d(out_features) result(res) end function linear2d + module function self_attention(num_heads) result(res) integer, intent(in) :: num_heads type(layer) :: res @@ -179,4 +181,20 @@ module function self_attention(num_heads) result(res) allocate(res % p, source=self_attention_layer(num_heads)) end function self_attention + + module function embedding(sequence_length, vocab_size, model_dimension) result(res) + integer, intent(in) :: sequence_length, vocab_size, model_dimension + type(layer) :: res + type(embedding_layer) :: embedding_layer_instance + + embedding_layer_instance = embedding_layer(vocab_size, model_dimension) + call embedding_layer_instance % init([sequence_length]) + res % name = 'embedding' + res % layer_shape = [sequence_length, model_dimension] + res % input_layer_shape = [integer ::] + allocate(res % p, source=embedding_layer_instance) + res % initialized = .true. + + end function embedding + end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ecdeb41d..a11fb66d 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_optimizers, only: optimizer_base_type contains @@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) end select end select @@ -80,6 +83,8 @@ pure module subroutine backward_2d(self, previous, gradient) select type(prev_layer => previous % p) type is(input2d_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) type is(linear2d_layer) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) @@ -91,6 +96,8 @@ pure module subroutine backward_2d(self, previous, gradient) select type(prev_layer => previous % p) type is(input2d_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) type is(linear2d_layer) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) @@ -254,6 +261,8 @@ module subroutine forward(self, input) select type(prev_layer => input % p) type is(input2d_layer) call this_layer % forward(prev_layer % output) + type is(embedding_layer) + call this_layer % forward(prev_layer % output) type is(linear2d_layer) call this_layer % forward(prev_layer % output) type is(self_attention_layer) @@ -266,6 +275,8 @@ module subroutine forward(self, input) select type(prev_layer => input % p) type is(input2d_layer) call this_layer % forward(prev_layer % output) + type is(embedding_layer) + call this_layer % forward(prev_layer % output) type is(linear2d_layer) call this_layer % forward(prev_layer % output) type is(self_attention_layer) @@ -307,6 +318,8 @@ pure module subroutine get_output_2d(self, output) type is(input2d_layer) allocate(output, source=this_layer % output) + type is(embedding_layer) + allocate(output, source=this_layer % output) type is(linear2d_layer) allocate(output, source=this_layer % output) type is(self_attention_layer) @@ -425,6 +438,8 @@ elemental module function get_num_params(self) result(num_params) num_params = this_layer % get_num_params() type is (self_attention_layer) num_params = this_layer % get_num_params() + type is (embedding_layer) + num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select @@ -458,6 +473,8 @@ module function get_params(self) result(params) params = this_layer % get_params() type is (self_attention_layer) params = this_layer % get_params() + type is (embedding_layer) + params = this_layer % get_params() class default error stop 'Unknown layer type.' end select @@ -491,6 +508,8 @@ module function get_gradients(self) result(gradients) gradients = this_layer % get_gradients() type is (self_attention_layer) gradients = this_layer % get_gradients() + type is (embedding_layer) + gradients = this_layer % get_gradients() class default error stop 'Unknown layer type.' end select @@ -548,6 +567,8 @@ module subroutine set_params(self, params) type is (self_attention_layer) call this_layer % set_params(params) + type is (embedding_layer) + call this_layer % set_params(params) type is (maxpool2d_layer) ! No parameters to set. diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index f344c5c5..5cd79f5a 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -11,6 +11,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_layer, only: layer use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape use nf_loss, only: quadratic @@ -46,7 +47,7 @@ module function network_from_layers(layers) result(res) error stop 'Error: A network must have at least 2 layers.' ! The first layer must be an input layer - if (.not. layers(1) % name == 'input') & + if (.not. layers(1) % name == 'input' .and. .not. layers(1) % name == 'embedding') & error stop 'Error: First layer in the network must be an input layer.' !TODO Ensure that the layers are in allowed sequence: @@ -207,8 +208,11 @@ module subroutine forward_1d(self, input) integer :: n ! Set the input array into the input layer - select type(input_layer => self % layers(1) % p); type is(input1d_layer) - call input_layer % set(input) + select type(input_layer => self % layers(1) % p) + type is(input1d_layer) + call input_layer % set(input) + type is(embedding_layer) + call input_layer % forward(nint(input)) end select do n = 2, size(self % layers) From 6bfea21c8a1e4ddb02e00875e3f793ecc54e16b7 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 13:11:50 +0400 Subject: [PATCH 06/14] embedding_layer: positional encoding --- src/nf/nf_embedding_layer.f90 | 10 +++++++++- src/nf/nf_embedding_layer_submodule.f90 | 26 ++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 index e4f3e10e..fa2d6076 100644 --- a/src/nf/nf_embedding_layer.f90 +++ b/src/nf/nf_embedding_layer.f90 @@ -15,6 +15,7 @@ module nf_embedding_layer !! This layer converts them into a table of shape !! (`sequence_length`, `model_dimension`) integer :: sequence_length, vocab_size, model_dimension + logical :: positional real, allocatable :: weights(:, :) real, allocatable :: output(:, :) @@ -24,6 +25,7 @@ module nf_embedding_layer procedure :: backward procedure :: forward + procedure :: positional_encoding procedure :: init procedure :: get_num_params procedure :: get_params @@ -33,8 +35,9 @@ module nf_embedding_layer end type embedding_layer interface embedding_layer - module function embedding_layer_cons(vocab_size, model_dimension) result(res) + module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) integer, intent(in) :: vocab_size, model_dimension + logical, optional :: positional type(embedding_layer) :: res end function embedding_layer_cons end interface embedding_layer @@ -54,6 +57,11 @@ pure module subroutine backward(self, input, gradient) real, intent(in) :: gradient(:, :) end subroutine backward + pure module subroutine positional_encoding(self, pos) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + end subroutine positional_encoding + module subroutine init(self, input_shape) class(embedding_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 index 5e38f22e..a81e15e3 100644 --- a/src/nf/nf_embedding_layer_submodule.f90 +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -2,12 +2,18 @@ use nf_base_layer, only: base_layer implicit none contains - module function embedding_layer_cons(vocab_size, model_dimension) result(res) + module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) integer, intent(in) :: vocab_size, model_dimension + logical, optional :: positional type(embedding_layer) :: res res % vocab_size = vocab_size res % model_dimension = model_dimension + if (.not. present(positional)) then + res % positional = .false. + else + res % positional = positional + end if end function embedding_layer_cons module subroutine init(self, input_shape) @@ -37,7 +43,12 @@ pure module subroutine forward(self, input) elseif (index == 0) then index = 1 end if + self % output(i, :) = self % weights(index, :) + + if (self % positional) then + call self % positional_encoding(i) + end if end do end subroutine forward @@ -52,6 +63,19 @@ pure module subroutine backward(self, input, gradient) end do end subroutine backward + pure module subroutine positional_encoding(self, pos) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + integer :: i + real :: theta + + do concurrent(i = 1: floor(real(self % model_dimension) / 2)) + theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension) + self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta) + self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta) + end do + end subroutine positional_encoding + pure module function get_num_params(self) result(num_params) class(embedding_layer), intent(in) :: self integer :: num_params From f1b414c155e2af4f6c2cc065418d357122c2afa4 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 13:13:36 +0400 Subject: [PATCH 07/14] embedding_layer: update tests --- test/test_embedding_layer.f90 | 93 +++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 25 deletions(-) diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 index 205ce3ca..028df4f7 100644 --- a/test/test_embedding_layer.f90 +++ b/test/test_embedding_layer.f90 @@ -4,32 +4,9 @@ program test_embedding_layer implicit none logical :: ok = .true. - integer :: sample_input(3) = [2, 1, 3] - real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2]) - real :: output_flat(6) - real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6]) - real :: dw_flat(8) - real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8]) - type(embedding_layer) :: embedding - - embedding = embedding_layer(vocab_size=4, model_dimension=2) - call embedding % init([3]) - embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2]) - - call embedding % forward(sample_input) - - output_flat = reshape(embedding % output, [6]) - if (.not. all(output_flat.eq.expected_output_flat)) then - ok = .false. - write(stderr, '(a)') 'forward returned incorrect values.. failed' - end if - call embedding % backward(sample_input, sample_gradient) - dw_flat = reshape(embedding % dw, shape(dw_flat)) - if (.not. all(dw_flat.eq.expected_dw_flat)) then - ok = .false. - write(stderr, '(a)') 'backward returned incorrect dw values.. failed' - end if + call test_simple(ok) + call test_positional(ok) if (ok) then print '(a)', 'test_embedding_layer: All tests passed.' @@ -37,4 +14,70 @@ program test_embedding_layer write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.' stop 1 end if + +contains + subroutine test_simple(ok) + logical, intent(in out) :: ok + + integer :: sample_input(3) = [2, 1, 3] + real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2]) + real :: output_flat(6) + real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6]) + real :: dw_flat(8) + real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8]) + type(embedding_layer) :: embedding + + embedding = embedding_layer(vocab_size=4, model_dimension=2) + call embedding % init([3]) + embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [6]) + if (.not. all(output_flat.eq.expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + + call embedding % backward(sample_input, sample_gradient) + dw_flat = reshape(embedding % dw, shape(dw_flat)) + if (.not. all(dw_flat.eq.expected_dw_flat)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect dw values.. failed' + end if + end subroutine test_simple + + subroutine test_positional(ok) + logical, intent(in out) :: ok + + integer :: sample_input(3) = [2, 1, 3] + real :: output_flat(12) + real :: expected_output_flat(12) = reshape([& + 0.3, 0.941471, 1.4092975,& + 1.3, 0.64030236, 0.08385316,& + 0.3, 0.10999984, 0.51999867,& + 1.3, 1.09995, 1.4998& + ], [12]) + type(embedding_layer) :: embedding + + real :: theta + integer :: i, pos + + embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=.true.) + call embedding % init([3]) + embedding % weights = reshape([& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2& + ], [5, 4]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [12]) + if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then + ok = .false. + write(stderr, '(a)') 'positional encoding returned incorrect values.. failed' + end if + end subroutine test_positional end program test_embedding_layer From 10e54d0f0b3f78104567902e17685741f7750ebf Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 13:19:20 +0400 Subject: [PATCH 08/14] embedding_layer: add more comments --- src/nf/nf_embedding_layer.f90 | 1 + src/nf/nf_layer_constructors.f90 | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 index fa2d6076..8c066ceb 100644 --- a/src/nf/nf_embedding_layer.f90 +++ b/src/nf/nf_embedding_layer.f90 @@ -58,6 +58,7 @@ pure module subroutine backward(self, input, gradient) end subroutine backward pure module subroutine positional_encoding(self, pos) + !! Sum embedding with positional info (trigonometric, not trianable) class(embedding_layer), intent(in out) :: self integer, intent(in) :: pos end subroutine positional_encoding diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index fb99f502..63411eec 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -234,6 +234,14 @@ module function self_attention(num_heads) result(res) end function self_attention module function embedding(sequence_length, vocab_size, model_dimension) result(res) + !! Embedding layer constructor. + !! + !! This layer is for inputting token indices from the dictionary to the network. + !! Works as a trainable lookup table that converts each index into a vector. + !! Embedding layer must be the first layer in a network. + !! `sequence_length`: max len of input sequence + !! `vocab_size`: length of token vocabulary + !! `model_dimension`: size of target embeddings integer, intent(in) :: sequence_length, vocab_size, model_dimension type(layer) :: res end function embedding From 0165642b996ef395102337636120919d6ff4d9c6 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Thu, 20 Feb 2025 13:22:46 +0400 Subject: [PATCH 09/14] embedding_layer: update cmake --- CMakeLists.txt | 2 ++ test/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1bf2231..562d2e1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(neural-fortran src/nf/nf_layer_submodule.f90 src/nf/nf_linear2d_layer.f90 src/nf/nf_linear2d_layer_submodule.f90 + src/nf/nf_embedding_layer.f90 + src/nf/nf_embedding_layer_submodule.f90 src/nf/nf_loss.f90 src/nf/nf_loss_submodule.f90 src/nf/nf_maxpool2d_layer.f90 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 741e9930..e7bbacc0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(execid insert_flatten reshape_layer multihead_attention_layer + embedding_layer dense_network get_set_network_params conv2d_network From dd0ab319167912200e696ae232c35307efd6ab4d Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Fri, 21 Feb 2025 22:07:58 +0400 Subject: [PATCH 10/14] embedding_layer: pr fixes --- src/nf/nf_embedding_layer_submodule.f90 | 4 ++-- test/test_embedding_layer.f90 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 index a81e15e3..eedcc2ac 100644 --- a/src/nf/nf_embedding_layer_submodule.f90 +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -88,7 +88,7 @@ module function get_params(self) result(params) real, pointer :: w_(:) => null() w_(1: product(shape(self % weights))) => self % weights - params = [w_] + params = w_ end function get_params module function get_gradients(self) result(gradients) @@ -97,7 +97,7 @@ module function get_gradients(self) result(gradients) real, pointer :: dw_(:) => null() dw_(1: product(shape(self % dw))) => self % dw - gradients = [dw_] + gradients = dw_ end function get_gradients module subroutine set_params(self, params) diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 index 028df4f7..4a7b47b6 100644 --- a/test/test_embedding_layer.f90 +++ b/test/test_embedding_layer.f90 @@ -12,7 +12,7 @@ program test_embedding_layer print '(a)', 'test_embedding_layer: All tests passed.' else write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.' - stop 1 + error stop 1 end if contains From 074bcd1edd70569082e730edd743570366aca51e Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 23 Feb 2025 11:44:23 +0400 Subject: [PATCH 11/14] embedding_layer: add absolute positional encoding --- src/nf/nf_embedding_layer.f90 | 17 +++++++--- src/nf/nf_embedding_layer_submodule.f90 | 28 +++++++++++---- test/test_embedding_layer.f90 | 45 ++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 index 8c066ceb..94a868a5 100644 --- a/src/nf/nf_embedding_layer.f90 +++ b/src/nf/nf_embedding_layer.f90 @@ -15,7 +15,7 @@ module nf_embedding_layer !! This layer converts them into a table of shape !! (`sequence_length`, `model_dimension`) integer :: sequence_length, vocab_size, model_dimension - logical :: positional + integer :: positional real, allocatable :: weights(:, :) real, allocatable :: output(:, :) @@ -25,7 +25,8 @@ module nf_embedding_layer procedure :: backward procedure :: forward - procedure :: positional_encoding + procedure :: positional_trigonometric + procedure :: positional_absolute procedure :: init procedure :: get_num_params procedure :: get_params @@ -37,7 +38,7 @@ module nf_embedding_layer interface embedding_layer module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) integer, intent(in) :: vocab_size, model_dimension - logical, optional :: positional + integer, optional :: positional type(embedding_layer) :: res end function embedding_layer_cons end interface embedding_layer @@ -57,11 +58,17 @@ pure module subroutine backward(self, input, gradient) real, intent(in) :: gradient(:, :) end subroutine backward - pure module subroutine positional_encoding(self, pos) + pure module subroutine positional_trigonometric(self, pos) !! Sum embedding with positional info (trigonometric, not trianable) class(embedding_layer), intent(in out) :: self integer, intent(in) :: pos - end subroutine positional_encoding + end subroutine positional_trigonometric + + pure module subroutine positional_absolute(self, pos) + !! Sum embedding with absolute position + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + end subroutine positional_absolute module subroutine init(self, input_shape) class(embedding_layer), intent(in out) :: self diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 index eedcc2ac..83992b22 100644 --- a/src/nf/nf_embedding_layer_submodule.f90 +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -1,16 +1,20 @@ +#define NONE 0 +#define TRIGONOMETRIC 1 +#define ABSOLUTE 2 + submodule(nf_embedding_layer) nf_embedding_layer_submodule use nf_base_layer, only: base_layer implicit none contains module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) integer, intent(in) :: vocab_size, model_dimension - logical, optional :: positional + integer, optional :: positional type(embedding_layer) :: res res % vocab_size = vocab_size res % model_dimension = model_dimension if (.not. present(positional)) then - res % positional = .false. + res % positional = NONE else res % positional = positional end if @@ -46,8 +50,10 @@ pure module subroutine forward(self, input) self % output(i, :) = self % weights(index, :) - if (self % positional) then - call self % positional_encoding(i) + if (self % positional == TRIGONOMETRIC) then + call self % positional_trigonometric(i) + elseif (self % positional == ABSOLUTE) then + call self % positional_absolute(i) end if end do end subroutine forward @@ -63,7 +69,7 @@ pure module subroutine backward(self, input, gradient) end do end subroutine backward - pure module subroutine positional_encoding(self, pos) + pure module subroutine positional_trigonometric(self, pos) class(embedding_layer), intent(in out) :: self integer, intent(in) :: pos integer :: i @@ -74,7 +80,17 @@ pure module subroutine positional_encoding(self, pos) self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta) self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta) end do - end subroutine positional_encoding + end subroutine positional_trigonometric + + pure module subroutine positional_absolute(self, pos) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + integer :: i + + do concurrent(i = 1: self % model_dimension) + self % output(pos, i) = self % output(pos, i) + pos - 1 + end do + end subroutine positional_absolute pure module function get_num_params(self) result(num_params) class(embedding_layer), intent(in) :: self diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 index 4a7b47b6..8f135db8 100644 --- a/test/test_embedding_layer.f90 +++ b/test/test_embedding_layer.f90 @@ -6,7 +6,8 @@ program test_embedding_layer logical :: ok = .true. call test_simple(ok) - call test_positional(ok) + call test_positional_trigonometric(ok) + call test_positional_absolute(ok) if (ok) then print '(a)', 'test_embedding_layer: All tests passed.' @@ -47,7 +48,7 @@ subroutine test_simple(ok) end if end subroutine test_simple - subroutine test_positional(ok) + subroutine test_positional_trigonometric(ok) logical, intent(in out) :: ok integer :: sample_input(3) = [2, 1, 3] @@ -63,7 +64,7 @@ subroutine test_positional(ok) real :: theta integer :: i, pos - embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=.true.) + embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=1) call embedding % init([3]) embedding % weights = reshape([& 0.1, 0.3, 0.5, 0.7, 0.2,& @@ -77,7 +78,41 @@ subroutine test_positional(ok) output_flat = reshape(embedding % output, [12]) if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then ok = .false. - write(stderr, '(a)') 'positional encoding returned incorrect values.. failed' + write(stderr, '(a)') 'trigonometric positional encoding returned incorrect values.. failed' end if - end subroutine test_positional + end subroutine test_positional_trigonometric + + subroutine test_positional_absolute(ok) + logical, intent(in out) :: ok + + integer :: sample_input(3) = [2, 1, 3] + real :: output_flat(12) + real :: expected_output_flat(12) = reshape([& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5& + ], [12]) + type(embedding_layer) :: embedding + + real :: theta + integer :: i, pos + + embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=2) + call embedding % init([3]) + embedding % weights = reshape([& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2& + ], [5, 4]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [12]) + if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then + ok = .false. + write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed' + end if + end subroutine test_positional_absolute end program test_embedding_layer From 73799bd5a4693b6be0e990a3db5b3f80134d6344 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 23 Feb 2025 14:49:32 +0400 Subject: [PATCH 12/14] embedding_layer: update constructor and tests --- src/nf/nf_layer_constructors.f90 | 3 +- src/nf/nf_layer_constructors_submodule.f90 | 5 ++-- test/test_embedding_layer.f90 | 33 ++++++++++++++++------ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 63411eec..0f17cc8d 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -233,7 +233,7 @@ module function self_attention(num_heads) result(res) !! Resulting layer instance end function self_attention - module function embedding(sequence_length, vocab_size, model_dimension) result(res) + module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res) !! Embedding layer constructor. !! !! This layer is for inputting token indices from the dictionary to the network. @@ -243,6 +243,7 @@ module function embedding(sequence_length, vocab_size, model_dimension) result(r !! `vocab_size`: length of token vocabulary !! `model_dimension`: size of target embeddings integer, intent(in) :: sequence_length, vocab_size, model_dimension + integer, optional, intent(in) :: positional type(layer) :: res end function embedding diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index a10f4c81..329f7d3d 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -182,12 +182,13 @@ module function self_attention(num_heads) result(res) end function self_attention - module function embedding(sequence_length, vocab_size, model_dimension) result(res) + module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res) integer, intent(in) :: sequence_length, vocab_size, model_dimension + integer, optional, intent(in) :: positional type(layer) :: res type(embedding_layer) :: embedding_layer_instance - embedding_layer_instance = embedding_layer(vocab_size, model_dimension) + embedding_layer_instance = embedding_layer(vocab_size, model_dimension, positional) call embedding_layer_instance % init([sequence_length]) res % name = 'embedding' res % layer_shape = [sequence_length, model_dimension] diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 index 8f135db8..99b7fca6 100644 --- a/test/test_embedding_layer.f90 +++ b/test/test_embedding_layer.f90 @@ -1,13 +1,16 @@ program test_embedding_layer use iso_fortran_env, only: stderr => error_unit use nf_embedding_layer, only: embedding_layer + use nf_layer, only: layer + use nf_layer_constructors, only: embedding_constructor => embedding implicit none logical :: ok = .true. + integer :: sample_input(3) = [2, 1, 3] - call test_simple(ok) - call test_positional_trigonometric(ok) - call test_positional_absolute(ok) + call test_simple(ok, sample_input) + call test_positional_trigonometric(ok, sample_input) + call test_positional_absolute(ok, sample_input) if (ok) then print '(a)', 'test_embedding_layer: All tests passed.' @@ -17,10 +20,10 @@ program test_embedding_layer end if contains - subroutine test_simple(ok) + subroutine test_simple(ok, sample_input) logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) - integer :: sample_input(3) = [2, 1, 3] real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2]) real :: output_flat(6) real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6]) @@ -48,10 +51,10 @@ subroutine test_simple(ok) end if end subroutine test_simple - subroutine test_positional_trigonometric(ok) + subroutine test_positional_trigonometric(ok, sample_input) logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) - integer :: sample_input(3) = [2, 1, 3] real :: output_flat(12) real :: expected_output_flat(12) = reshape([& 0.3, 0.941471, 1.4092975,& @@ -82,10 +85,10 @@ subroutine test_positional_trigonometric(ok) end if end subroutine test_positional_trigonometric - subroutine test_positional_absolute(ok) + subroutine test_positional_absolute(ok, sample_input) logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) - integer :: sample_input(3) = [2, 1, 3] real :: output_flat(12) real :: expected_output_flat(12) = reshape([& 0.3, 1.1, 2.5,& @@ -115,4 +118,16 @@ subroutine test_positional_absolute(ok) write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed' end if end subroutine test_positional_absolute + + subroutine test_embedding_constructor(ok, sample_input) + logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) + + type(layer) :: embedding_constructed + + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=0) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=1) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=2) + end subroutine test_embedding_constructor end program test_embedding_layer From fe02beb724e7b400dcc59ed341031b1499db421b Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 23 Feb 2025 21:12:10 +0400 Subject: [PATCH 13/14] embedding_layer: make integer input generics --- src/nf/nf_network.f90 | 19 +++++++++++++-- src/nf/nf_network_submodule.f90 | 42 +++++++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 5916924e..53d3c07d 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -32,17 +32,19 @@ module nf_network procedure, private :: evaluate_batch_1d procedure, private :: forward_1d + procedure, private :: forward_1d_int procedure, private :: forward_2d procedure, private :: forward_3d procedure, private :: predict_1d + procedure, private :: predict_1d_int procedure, private :: predict_2d procedure, private :: predict_3d procedure, private :: predict_batch_1d procedure, private :: predict_batch_3d generic :: evaluate => evaluate_batch_1d - generic :: forward => forward_1d, forward_2d, forward_3d - generic :: predict => predict_1d, predict_2d, predict_3d + generic :: forward => forward_1d, forward_1d_int, forward_2d, forward_3d + generic :: predict => predict_1d, predict_1d_int, predict_2d, predict_3d generic :: predict_batch => predict_batch_1d, predict_batch_3d end type network @@ -95,6 +97,12 @@ module subroutine forward_1d(self, input) !! 1-d input data end subroutine forward_1d + module subroutine forward_1d_int(self, input) + !! Same as `forward_1d` except `integer` + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + end subroutine forward_1d_int + module subroutine forward_2d(self, input) !! Apply a forward pass through the network. !! @@ -137,6 +145,13 @@ module function predict_1d(self, input) result(res) !! Output of the network end function predict_1d + module function predict_1d_int(self, input) result(res) + !! Same as `predict_1d` except `integer` + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + real, allocatable :: res(:) + end function predict_1d_int + module function predict_2d(self, input) result(res) !! Return the output of the network given the input 1-d array. class(network), intent(in out) :: self diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 5cd79f5a..5816db12 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -211,8 +211,6 @@ module subroutine forward_1d(self, input) select type(input_layer => self % layers(1) % p) type is(input1d_layer) call input_layer % set(input) - type is(embedding_layer) - call input_layer % forward(nint(input)) end select do n = 2, size(self % layers) @@ -221,6 +219,21 @@ module subroutine forward_1d(self, input) end subroutine forward_1d + module subroutine forward_1d_int(self, input) + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + integer :: n + + select type(input_layer => self % layers(1) % p) + type is(embedding_layer) + call input_layer % forward(input) + end select + + do n = 2, size(self % layers) + call self % layers(n) % forward(self % layers(n - 1)) + end do + + end subroutine forward_1d_int module subroutine forward_2d(self, input) class(network), intent(in out) :: self @@ -285,6 +298,31 @@ module function predict_1d(self, input) result(res) end function predict_1d + module function predict_1d_int(self, input) result(res) + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + real, allocatable :: res(:) + integer :: n, num_layers + + num_layers = size(self % layers) + + call self % set_training_mode(.false.) + call self % forward(input) + call self % set_training_mode(.true.) + + select type(output_layer => self % layers(num_layers) % p) + type is(dense_layer) + res = output_layer % output + type is(dropout_layer) + res = output_layer % output + type is(flatten_layer) + res = output_layer % output + class default + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' + end select + + end function predict_1d_int module function predict_2d(self, input) result(res) class(network), intent(in out) :: self From e97be10a73d2d98a8c7bd790991cbee998ca7a79 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Tue, 4 Mar 2025 14:21:25 +0400 Subject: [PATCH 14/14] embedding_layer: update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9fe3fab0..e94296a3 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass | |------------|------------------|------------------------|----------------------|--------------|---------------| | Input | `input` | n/a | 1, 2, 3 | n/a | n/a | +| Embedding | `embedding` | n/a | 2 | ✅ | ✅ | | Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ | | Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ | | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |