diff --git a/CMakeLists.txt b/CMakeLists.txt index 906bdd0d..beb3389d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,8 @@ add_library(neural-fortran src/nf/nf_layer_submodule.f90 src/nf/nf_linear2d_layer.f90 src/nf/nf_linear2d_layer_submodule.f90 + src/nf/nf_embedding_layer.f90 + src/nf/nf_embedding_layer_submodule.f90 src/nf/nf_loss.f90 src/nf/nf_loss_submodule.f90 src/nf/nf_maxpool2d_layer.f90 diff --git a/README.md b/README.md index 9fe3fab0..e94296a3 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass | |------------|------------------|------------------------|----------------------|--------------|---------------| | Input | `input` | n/a | 1, 2, 3 | n/a | n/a | +| Embedding | `embedding` | n/a | 2 | ✅ | ✅ | | Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ | | Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ | | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | diff --git a/src/nf.f90 b/src/nf.f90 index d089c4ac..181183e7 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -6,13 +6,14 @@ module nf conv2d, & dense, & dropout, & + embedding, & flatten, & input, & + layernorm, & linear2d, & maxpool2d, & reshape, & - self_attention, & - layernorm + self_attention use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90 new file mode 100644 index 00000000..94a868a5 --- /dev/null +++ b/src/nf/nf_embedding_layer.f90 @@ -0,0 +1,98 @@ +module nf_embedding_layer + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: embedding_layer + + type, extends(base_layer) :: embedding_layer + !! Embedding Layer + !! Stores inputs as a trainable lookup table. Inputs are + !! integer indicies in a dictionary of `vocab_size`. + !! This layer converts them into a table of shape + !! (`sequence_length`, `model_dimension`) + integer :: sequence_length, vocab_size, model_dimension + integer :: positional + + real, allocatable :: weights(:, :) + real, allocatable :: output(:, :) + real, allocatable :: dw(:, :) ! weight gradients + + contains + + procedure :: backward + procedure :: forward + procedure :: positional_trigonometric + procedure :: positional_absolute + procedure :: init + procedure :: get_num_params + procedure :: get_params + procedure :: get_gradients + procedure :: set_params + + end type embedding_layer + + interface embedding_layer + module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) + integer, intent(in) :: vocab_size, model_dimension + integer, optional :: positional + type(embedding_layer) :: res + end function embedding_layer_cons + end interface embedding_layer + + interface + pure module subroutine forward(self, input) + !! Get vectors by indicis in the dictionary + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + end subroutine forward + + pure module subroutine backward(self, input, gradient) + !! Update gradient at `input` indices + !! dw_i = W_i + d_output_i + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + real, intent(in) :: gradient(:, :) + end subroutine backward + + pure module subroutine positional_trigonometric(self, pos) + !! Sum embedding with positional info (trigonometric, not trianable) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + end subroutine positional_trigonometric + + pure module subroutine positional_absolute(self, pos) + !! Sum embedding with absolute position + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + end subroutine positional_absolute + + module subroutine init(self, input_shape) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module function get_num_params(self) result(num_params) + class(embedding_layer), intent(in) :: self + integer :: num_params + end function get_num_params + + module function get_params(self) result(params) + class(embedding_layer), intent(in), target :: self + real, allocatable :: params(:) + end function get_params + + module function get_gradients(self) result(gradients) + class(embedding_layer), intent(in), target :: self + real, allocatable :: gradients(:) + end function get_gradients + + module subroutine set_params(self, params) + class(embedding_layer), intent(in out) :: self + real, intent(in), target :: params(:) + end subroutine set_params + end interface +end module nf_embedding_layer diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90 new file mode 100644 index 00000000..83992b22 --- /dev/null +++ b/src/nf/nf_embedding_layer_submodule.f90 @@ -0,0 +1,137 @@ +#define NONE 0 +#define TRIGONOMETRIC 1 +#define ABSOLUTE 2 + +submodule(nf_embedding_layer) nf_embedding_layer_submodule + use nf_base_layer, only: base_layer + implicit none +contains + module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res) + integer, intent(in) :: vocab_size, model_dimension + integer, optional :: positional + type(embedding_layer) :: res + + res % vocab_size = vocab_size + res % model_dimension = model_dimension + if (.not. present(positional)) then + res % positional = NONE + else + res % positional = positional + end if + end function embedding_layer_cons + + module subroutine init(self, input_shape) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % sequence_length = input_shape(1) + + allocate(self % output(self % sequence_length, self % model_dimension)) + + allocate(self % weights(self % vocab_size, self % model_dimension)) + self % weights = 0.1 + + allocate(self % dw(self % vocab_size, self % model_dimension)) + self % dw = 0.0 + end subroutine init + + pure module subroutine forward(self, input) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + integer :: i, index + + do concurrent(i = 1: self % sequence_length) + index = input(i) + if (index > size(self % weights, 1)) then + index = 1 + elseif (index == 0) then + index = 1 + end if + + self % output(i, :) = self % weights(index, :) + + if (self % positional == TRIGONOMETRIC) then + call self % positional_trigonometric(i) + elseif (self % positional == ABSOLUTE) then + call self % positional_absolute(i) + end if + end do + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: input(:) + real, intent(in) :: gradient(:, :) + integer :: i + + do concurrent(i = 1: self % sequence_length) + self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :) + end do + end subroutine backward + + pure module subroutine positional_trigonometric(self, pos) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + integer :: i + real :: theta + + do concurrent(i = 1: floor(real(self % model_dimension) / 2)) + theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension) + self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta) + self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta) + end do + end subroutine positional_trigonometric + + pure module subroutine positional_absolute(self, pos) + class(embedding_layer), intent(in out) :: self + integer, intent(in) :: pos + integer :: i + + do concurrent(i = 1: self % model_dimension) + self % output(pos, i) = self % output(pos, i) + pos - 1 + end do + end subroutine positional_absolute + + pure module function get_num_params(self) result(num_params) + class(embedding_layer), intent(in) :: self + integer :: num_params + num_params = self % vocab_size * self % model_dimension + end function get_num_params + + module function get_params(self) result(params) + class(embedding_layer), intent(in), target :: self + real, allocatable :: params(:) + real, pointer :: w_(:) => null() + + w_(1: product(shape(self % weights))) => self % weights + params = w_ + end function get_params + + module function get_gradients(self) result(gradients) + class(embedding_layer), intent(in), target :: self + real, allocatable :: gradients(:) + real, pointer :: dw_(:) => null() + + dw_(1: product(shape(self % dw))) => self % dw + gradients = dw_ + end function get_gradients + + module subroutine set_params(self, params) + class(embedding_layer), intent(in out) :: self + real, intent(in), target :: params(:) + + real, pointer :: p_(:,:) => null() + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + associate(n => self % vocab_size * self % model_dimension) + ! reshape the weights + p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n) + self % weights = p_ + end associate + + end subroutine set_params +end submodule nf_embedding_layer_submodule diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index ce00b6bc..a13eee4a 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -18,6 +18,7 @@ module nf_layer_constructors maxpool2d, & reshape, & self_attention, & + embedding, & layernorm interface input @@ -233,6 +234,23 @@ module function self_attention(num_heads) result(res) !! Resulting layer instance end function self_attention + module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res) + !! Embedding layer constructor. + !! + !! This layer is for inputting token indices from the dictionary to the network. + !! Works as a trainable lookup table that converts each index into a vector. + !! Embedding layer must be the first layer in a network. + integer, intent(in) :: sequence_length + !! max len of input sequence + integer, intent(in) :: vocab_size + !! length of token vocabulary + integer, intent(in) :: model_dimension + !! size of target embeddings + integer, optional, intent(in) :: positional + !! positional encoding + type(layer) :: res + end function embedding + module function layernorm() result(res) !! Layer Normalization !! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 5c2e8893..cd829f59 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_layernorm_layer, only: layernorm_layer use nf_activation, only: activation_function, relu, sigmoid @@ -172,6 +173,7 @@ module function linear2d(out_features) result(res) end function linear2d + module function self_attention(num_heads) result(res) integer, intent(in) :: num_heads type(layer) :: res @@ -180,9 +182,26 @@ module function self_attention(num_heads) result(res) allocate(res % p, source=self_attention_layer(num_heads)) end function self_attention - module function layernorm() result(res) + + module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res) + integer, intent(in) :: sequence_length, vocab_size, model_dimension + integer, optional, intent(in) :: positional type(layer) :: res + type(embedding_layer) :: embedding_layer_instance + + embedding_layer_instance = embedding_layer(vocab_size, model_dimension, positional) + call embedding_layer_instance % init([sequence_length]) + res % name = 'embedding' + res % layer_shape = [sequence_length, model_dimension] + res % input_layer_shape = [integer ::] + allocate(res % p, source=embedding_layer_instance) + res % initialized = .true. + + end function embedding + + module function layernorm() result(res) + type(layer) :: res res % name = 'layernorm' allocate(res % p, source=layernorm_layer()) end function layernorm diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index a3b42434..afc64746 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_layernorm_layer, only: layernorm_layer use nf_optimizers, only: optimizer_base_type @@ -61,6 +62,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) type is(layernorm_layer) call this_layer % backward(prev_layer % output, gradient) end select @@ -83,6 +86,8 @@ pure module subroutine backward_2d(self, previous, gradient) select type(prev_layer => previous % p) type is(input2d_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) type is(linear2d_layer) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) @@ -96,6 +101,8 @@ pure module subroutine backward_2d(self, previous, gradient) select type(prev_layer => previous % p) type is(input2d_layer) call this_layer % backward(prev_layer % output, gradient) + type is(embedding_layer) + call this_layer % backward(prev_layer % output, gradient) type is(linear2d_layer) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) @@ -271,6 +278,8 @@ module subroutine forward(self, input) select type(prev_layer => input % p) type is(input2d_layer) call this_layer % forward(prev_layer % output) + type is(embedding_layer) + call this_layer % forward(prev_layer % output) type is(linear2d_layer) call this_layer % forward(prev_layer % output) type is(self_attention_layer) @@ -285,6 +294,8 @@ module subroutine forward(self, input) select type(prev_layer => input % p) type is(input2d_layer) call this_layer % forward(prev_layer % output) + type is(embedding_layer) + call this_layer % forward(prev_layer % output) type is(linear2d_layer) call this_layer % forward(prev_layer % output) type is(self_attention_layer) @@ -338,6 +349,8 @@ pure module subroutine get_output_2d(self, output) type is(input2d_layer) allocate(output, source=this_layer % output) + type is(embedding_layer) + allocate(output, source=this_layer % output) type is(linear2d_layer) allocate(output, source=this_layer % output) type is(self_attention_layer) @@ -460,6 +473,8 @@ elemental module function get_num_params(self) result(num_params) num_params = this_layer % get_num_params() type is (self_attention_layer) num_params = this_layer % get_num_params() + type is (embedding_layer) + num_params = this_layer % get_num_params() type is (layernorm_layer) num_params = this_layer % get_num_params() class default @@ -495,6 +510,8 @@ module function get_params(self) result(params) params = this_layer % get_params() type is (self_attention_layer) params = this_layer % get_params() + type is (embedding_layer) + params = this_layer % get_params() type is (layernorm_layer) params = this_layer % get_params() class default @@ -530,6 +547,8 @@ module function get_gradients(self) result(gradients) gradients = this_layer % get_gradients() type is (self_attention_layer) gradients = this_layer % get_gradients() + type is (embedding_layer) + gradients = this_layer % get_gradients() type is (layernorm_layer) gradients = this_layer % get_gradients() class default @@ -589,6 +608,8 @@ module subroutine set_params(self, params) type is (self_attention_layer) call this_layer % set_params(params) + type is (embedding_layer) + call this_layer % set_params(params) type is (layernorm_layer) call this_layer % set_params(params) diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 5916924e..53d3c07d 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -32,17 +32,19 @@ module nf_network procedure, private :: evaluate_batch_1d procedure, private :: forward_1d + procedure, private :: forward_1d_int procedure, private :: forward_2d procedure, private :: forward_3d procedure, private :: predict_1d + procedure, private :: predict_1d_int procedure, private :: predict_2d procedure, private :: predict_3d procedure, private :: predict_batch_1d procedure, private :: predict_batch_3d generic :: evaluate => evaluate_batch_1d - generic :: forward => forward_1d, forward_2d, forward_3d - generic :: predict => predict_1d, predict_2d, predict_3d + generic :: forward => forward_1d, forward_1d_int, forward_2d, forward_3d + generic :: predict => predict_1d, predict_1d_int, predict_2d, predict_3d generic :: predict_batch => predict_batch_1d, predict_batch_3d end type network @@ -95,6 +97,12 @@ module subroutine forward_1d(self, input) !! 1-d input data end subroutine forward_1d + module subroutine forward_1d_int(self, input) + !! Same as `forward_1d` except `integer` + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + end subroutine forward_1d_int + module subroutine forward_2d(self, input) !! Apply a forward pass through the network. !! @@ -137,6 +145,13 @@ module function predict_1d(self, input) result(res) !! Output of the network end function predict_1d + module function predict_1d_int(self, input) result(res) + !! Same as `predict_1d` except `integer` + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + real, allocatable :: res(:) + end function predict_1d_int + module function predict_2d(self, input) result(res) !! Return the output of the network given the input 1-d array. class(network), intent(in out) :: self diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index a6b7657c..efd98327 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -11,6 +11,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_embedding_layer, only: embedding_layer use nf_layernorm_layer, only: layernorm_layer use nf_layer, only: layer use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape @@ -47,7 +48,7 @@ module function network_from_layers(layers) result(res) error stop 'Error: A network must have at least 2 layers.' ! The first layer must be an input layer - if (.not. layers(1) % name == 'input') & + if (.not. layers(1) % name == 'input' .and. .not. layers(1) % name == 'embedding') & error stop 'Error: First layer in the network must be an input layer.' !TODO Ensure that the layers are in allowed sequence: @@ -210,8 +211,9 @@ module subroutine forward_1d(self, input) integer :: n ! Set the input array into the input layer - select type(input_layer => self % layers(1) % p); type is(input1d_layer) - call input_layer % set(input) + select type(input_layer => self % layers(1) % p) + type is(input1d_layer) + call input_layer % set(input) end select do n = 2, size(self % layers) @@ -220,6 +222,21 @@ module subroutine forward_1d(self, input) end subroutine forward_1d + module subroutine forward_1d_int(self, input) + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + integer :: n + + select type(input_layer => self % layers(1) % p) + type is(embedding_layer) + call input_layer % forward(input) + end select + + do n = 2, size(self % layers) + call self % layers(n) % forward(self % layers(n - 1)) + end do + + end subroutine forward_1d_int module subroutine forward_2d(self, input) class(network), intent(in out) :: self @@ -284,6 +301,31 @@ module function predict_1d(self, input) result(res) end function predict_1d + module function predict_1d_int(self, input) result(res) + class(network), intent(in out) :: self + integer, intent(in) :: input(:) + real, allocatable :: res(:) + integer :: n, num_layers + + num_layers = size(self % layers) + + call self % set_training_mode(.false.) + call self % forward(input) + call self % set_training_mode(.true.) + + select type(output_layer => self % layers(num_layers) % p) + type is(dense_layer) + res = output_layer % output + type is(dropout_layer) + res = output_layer % output + type is(flatten_layer) + res = output_layer % output + class default + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' + end select + + end function predict_1d_int module function predict_2d(self, input) result(res) class(network), intent(in out) :: self diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 46d349c1..6982ddd4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(execid insert_flatten reshape_layer multihead_attention_layer + embedding_layer layernorm dense_network get_set_network_params diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90 new file mode 100644 index 00000000..99b7fca6 --- /dev/null +++ b/test/test_embedding_layer.f90 @@ -0,0 +1,133 @@ +program test_embedding_layer + use iso_fortran_env, only: stderr => error_unit + use nf_embedding_layer, only: embedding_layer + use nf_layer, only: layer + use nf_layer_constructors, only: embedding_constructor => embedding + implicit none + + logical :: ok = .true. + integer :: sample_input(3) = [2, 1, 3] + + call test_simple(ok, sample_input) + call test_positional_trigonometric(ok, sample_input) + call test_positional_absolute(ok, sample_input) + + if (ok) then + print '(a)', 'test_embedding_layer: All tests passed.' + else + write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.' + error stop 1 + end if + +contains + subroutine test_simple(ok, sample_input) + logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) + + real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2]) + real :: output_flat(6) + real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6]) + real :: dw_flat(8) + real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8]) + type(embedding_layer) :: embedding + + embedding = embedding_layer(vocab_size=4, model_dimension=2) + call embedding % init([3]) + embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [6]) + if (.not. all(output_flat.eq.expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + + call embedding % backward(sample_input, sample_gradient) + dw_flat = reshape(embedding % dw, shape(dw_flat)) + if (.not. all(dw_flat.eq.expected_dw_flat)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect dw values.. failed' + end if + end subroutine test_simple + + subroutine test_positional_trigonometric(ok, sample_input) + logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) + + real :: output_flat(12) + real :: expected_output_flat(12) = reshape([& + 0.3, 0.941471, 1.4092975,& + 1.3, 0.64030236, 0.08385316,& + 0.3, 0.10999984, 0.51999867,& + 1.3, 1.09995, 1.4998& + ], [12]) + type(embedding_layer) :: embedding + + real :: theta + integer :: i, pos + + embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=1) + call embedding % init([3]) + embedding % weights = reshape([& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2& + ], [5, 4]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [12]) + if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then + ok = .false. + write(stderr, '(a)') 'trigonometric positional encoding returned incorrect values.. failed' + end if + end subroutine test_positional_trigonometric + + subroutine test_positional_absolute(ok, sample_input) + logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) + + real :: output_flat(12) + real :: expected_output_flat(12) = reshape([& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5,& + 0.3, 1.1, 2.5& + ], [12]) + type(embedding_layer) :: embedding + + real :: theta + integer :: i, pos + + embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=2) + call embedding % init([3]) + embedding % weights = reshape([& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2,& + 0.1, 0.3, 0.5, 0.7, 0.2& + ], [5, 4]) + + call embedding % forward(sample_input) + + output_flat = reshape(embedding % output, [12]) + if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then + ok = .false. + write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed' + end if + end subroutine test_positional_absolute + + subroutine test_embedding_constructor(ok, sample_input) + logical, intent(in out) :: ok + integer, intent(in) :: sample_input(:) + + type(layer) :: embedding_constructed + + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=0) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=1) + embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=2) + end subroutine test_embedding_constructor +end program test_embedding_layer