From 1c54cf0af2e4f866e53df574675b291f901fc8f7 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 11:53:14 +0400
Subject: [PATCH 01/14] embedding_layer: initial forward implementation

---
 src/nf/nf_embedding_layer.f90     | 77 ++++++++++++++++++++++++
 src/nf/nf_embedding_submodule.f90 | 97 +++++++++++++++++++++++++++++++
 test/test_embedding_layer.f90     | 14 +++++
 3 files changed, 188 insertions(+)
 create mode 100644 src/nf/nf_embedding_layer.f90
 create mode 100644 src/nf/nf_embedding_submodule.f90
 create mode 100644 test/test_embedding_layer.f90

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
new file mode 100644
index 00000000..9074ece8
--- /dev/null
+++ b/src/nf/nf_embedding_layer.f90
@@ -0,0 +1,77 @@
+module nf_embedding_layer
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: embedding_layer
+
+  type, extends(base_layer) :: embedding_layer
+    integer :: sequence_length, vocab_size, model_dimension
+
+    real, allocatable :: weights(:, :)
+    real, allocatable :: output(:, :)
+    real, allocatable :: gradient(:, :) ! input gradient
+    real, allocatable :: dw(:, :) ! weight gradients
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: init
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
+
+  end type embedding_layer
+
+  interface embedding_layer
+    module function embedding_layer_cons(&
+        sequence_length, vocab_size, model_dimension&
+    ) result(res)
+      integer, intent(in) :: sequence_length, vocab_size, model_dimension
+      type(embedding_layer) :: res
+    end function embedding_layer_cons
+  end interface embedding_layer
+
+  interface
+    pure module subroutine forward(self, input)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input(:)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input(:)
+      real, intent(in) :: gradient(:)
+    end subroutine backward
+
+    module subroutine init(self, input_shape)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module function get_num_params(self) result(num_params)
+       class(embedding_layer), intent(in) :: self
+       integer :: num_params
+    end function get_num_params
+
+    module function get_params(self) result(params)
+      class(embedding_layer), intent(in), target :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+    module function get_gradients(self) result(gradients)
+      class(embedding_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      class(embedding_layer), intent(in out) :: self
+      real, intent(in), target :: params(:)
+    end subroutine set_params
+  end interface
+end module nf_embedding_layer
diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_submodule.f90
new file mode 100644
index 00000000..2d360107
--- /dev/null
+++ b/src/nf/nf_embedding_submodule.f90
@@ -0,0 +1,97 @@
+submodule(nf_embedding_layer) nf_embedding_layer_submodule
+  use nf_base_layer, only: base_layer
+  implicit none
+contains
+  module function embedding_layer_cons(&
+      sequence_length, vocab_size, model_dimension&
+  ) result(res)
+    integer, intent(in) :: sequence_length, vocab_size, model_dimension
+    type(embedding_layer) :: res
+
+    res % vocab_size = vocab_size
+    res % model_dimension = model_dimension
+    res % sequence_length = sequence_length
+  end function embedding_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    allocate(self % output(self % sequence_length, self % model_dimension))
+    allocate(self % gradient(self % sequence_length, self % vocab_size))
+
+    allocate(self % weights(self % vocab_size, self % model_dimension))
+    self % weights = 0.1
+
+    allocate(self % dw(self % vocab_size, self % model_dimension))
+    self % dw = 0.0
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    integer :: i
+
+    do concurrent(i = 1: self % sequence_length)
+      self % output(i, :) = self % weights(input(i), :)
+    end do
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    real, intent(in) :: gradient(:)
+    real :: db(self % model_dimension)
+    real :: dw(self % vocab_size, self % model_dimension)
+    integer :: i
+  end subroutine backward
+
+  pure module function get_num_params(self) result(num_params)
+    class(embedding_layer), intent(in) :: self
+    integer :: num_params
+
+    ! Number of weigths times number of biases
+    num_params = self % vocab_size * self % model_dimension + self % model_dimension
+
+  end function get_num_params
+
+
+  module function get_params(self) result(params)
+    class(embedding_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+    real, pointer :: w_(:) => null()
+
+    w_(1: product(shape(self % weights))) => self % weights
+    params = [w_]
+  end function get_params
+
+
+  module function get_gradients(self) result(gradients)
+    class(embedding_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+    real, pointer :: dw_(:) => null()
+
+    dw_(1: product(shape(self % dw))) => self % dw
+    gradients = [dw_]
+  end function get_gradients
+
+
+  module subroutine set_params(self, params)
+    class(embedding_layer), intent(in out) :: self
+    real, intent(in), target :: params(:)
+
+    real, pointer :: p_(:,:) => null()
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    associate(n => self % vocab_size * self % model_dimension)
+      ! reshape the weights
+      p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n)
+      self % weights = p_
+    end associate
+
+  end subroutine set_params
+end submodule nf_embedding_layer_submodule
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
new file mode 100644
index 00000000..1f0692e0
--- /dev/null
+++ b/test/test_embedding_layer.f90
@@ -0,0 +1,14 @@
+program test_embedding_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_embedding_layer, only: embedding_layer
+  implicit none
+
+  logical :: ok = .true.
+  integer :: sample_input(3) = [2, 1, 3]
+  type(embedding_layer) :: embedding
+
+  embedding = embedding_layer(sequence_length=3, vocab_size=4, model_dimension=2)
+  call embedding % init([0])
+  embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2])
+  call embedding % forward(sample_input)
+end program test_embedding_layer
\ No newline at end of file

From d4731a1fe930510e7b7576b7fc27aba016d091aa Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 17:12:46 +0400
Subject: [PATCH 02/14] embedding_layer: implementation of embedding layer

---
 src/nf/nf_embedding_layer.f90     | 16 ++++++++++-----
 src/nf/nf_embedding_submodule.f90 | 33 +++++++++++++++----------------
 test/test_embedding_layer.f90     | 32 +++++++++++++++++++++++++++---
 3 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
index 9074ece8..0cb4f923 100644
--- a/src/nf/nf_embedding_layer.f90
+++ b/src/nf/nf_embedding_layer.f90
@@ -9,6 +9,11 @@ module nf_embedding_layer
   public :: embedding_layer
 
   type, extends(base_layer) :: embedding_layer
+    !! Embedding Layer
+    !! Stores inputs as a trainable lookup table. Inputs are
+    !! integer indicies in a dictionary of `vocab_size`.
+    !! This layer converts them into a table of shape
+    !! (`sequence_length`, `model_dimension`)
     integer :: sequence_length, vocab_size, model_dimension
 
     real, allocatable :: weights(:, :)
@@ -29,24 +34,25 @@ module nf_embedding_layer
   end type embedding_layer
 
   interface embedding_layer
-    module function embedding_layer_cons(&
-        sequence_length, vocab_size, model_dimension&
-    ) result(res)
-      integer, intent(in) :: sequence_length, vocab_size, model_dimension
+    module function embedding_layer_cons(vocab_size, model_dimension) result(res)
+      integer, intent(in) :: vocab_size, model_dimension
       type(embedding_layer) :: res
     end function embedding_layer_cons
   end interface embedding_layer
 
   interface
     pure module subroutine forward(self, input)
+      !! Get vectors by indicis in the dictionary
       class(embedding_layer), intent(in out) :: self
       integer, intent(in) :: input(:)
     end subroutine forward
 
     pure module subroutine backward(self, input, gradient)
+      !! Update gradient at `input` indices
+      !! dw_i = W_i + d_output_i
       class(embedding_layer), intent(in out) :: self
       integer, intent(in) :: input(:)
-      real, intent(in) :: gradient(:)
+      real, intent(in) :: gradient(:, :)
     end subroutine backward
 
     module subroutine init(self, input_shape)
diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_submodule.f90
index 2d360107..fd7d6bc6 100644
--- a/src/nf/nf_embedding_submodule.f90
+++ b/src/nf/nf_embedding_submodule.f90
@@ -2,21 +2,20 @@
   use nf_base_layer, only: base_layer
   implicit none
 contains
-  module function embedding_layer_cons(&
-      sequence_length, vocab_size, model_dimension&
-  ) result(res)
-    integer, intent(in) :: sequence_length, vocab_size, model_dimension
+  module function embedding_layer_cons(vocab_size, model_dimension) result(res)
+    integer, intent(in) :: vocab_size, model_dimension
     type(embedding_layer) :: res
 
     res % vocab_size = vocab_size
     res % model_dimension = model_dimension
-    res % sequence_length = sequence_length
   end function embedding_layer_cons
 
   module subroutine init(self, input_shape)
     class(embedding_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
+    self % sequence_length = input_shape(1)
+
     allocate(self % output(self % sequence_length, self % model_dimension))
     allocate(self % gradient(self % sequence_length, self % vocab_size))
 
@@ -30,32 +29,34 @@ end subroutine init
   pure module subroutine forward(self, input)
     class(embedding_layer), intent(in out) :: self
     integer, intent(in) :: input(:)
-    integer :: i
+    integer :: i, index
 
     do concurrent(i = 1: self % sequence_length)
-      self % output(i, :) = self % weights(input(i), :)
+      index = input(i)
+      if (index > size(self % weights, 1)) then
+        index = 1
+      end if
+      self % output(i, :) = self % weights(index, :)
     end do
   end subroutine forward
 
   pure module subroutine backward(self, input, gradient)
     class(embedding_layer), intent(in out) :: self
     integer, intent(in) :: input(:)
-    real, intent(in) :: gradient(:)
-    real :: db(self % model_dimension)
-    real :: dw(self % vocab_size, self % model_dimension)
+    real, intent(in) :: gradient(:, :)
     integer :: i
+
+    do concurrent(i = 1: self % sequence_length)
+      self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :)
+    end do
   end subroutine backward
 
   pure module function get_num_params(self) result(num_params)
     class(embedding_layer), intent(in) :: self
     integer :: num_params
-
-    ! Number of weigths times number of biases
-    num_params = self % vocab_size * self % model_dimension + self % model_dimension
-
+    num_params = self % vocab_size * self % model_dimension
   end function get_num_params
 
-
   module function get_params(self) result(params)
     class(embedding_layer), intent(in), target :: self
     real, allocatable :: params(:)
@@ -65,7 +66,6 @@ module function get_params(self) result(params)
     params = [w_]
   end function get_params
 
-
   module function get_gradients(self) result(gradients)
     class(embedding_layer), intent(in), target :: self
     real, allocatable :: gradients(:)
@@ -75,7 +75,6 @@ module function get_gradients(self) result(gradients)
     gradients = [dw_]
   end function get_gradients
 
-
   module subroutine set_params(self, params)
     class(embedding_layer), intent(in out) :: self
     real, intent(in), target :: params(:)
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
index 1f0692e0..205ce3ca 100644
--- a/test/test_embedding_layer.f90
+++ b/test/test_embedding_layer.f90
@@ -5,10 +5,36 @@ program test_embedding_layer
 
   logical :: ok = .true.
   integer :: sample_input(3) = [2, 1, 3]
+  real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2])
+  real :: output_flat(6)
+  real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6])
+  real :: dw_flat(8)
+  real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8])
   type(embedding_layer) :: embedding
 
-  embedding = embedding_layer(sequence_length=3, vocab_size=4, model_dimension=2)
-  call embedding % init([0])
+  embedding = embedding_layer(vocab_size=4, model_dimension=2)
+  call embedding % init([3])
   embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2])
+
   call embedding % forward(sample_input)
-end program test_embedding_layer
\ No newline at end of file
+
+  output_flat = reshape(embedding % output, [6])
+  if (.not. all(output_flat.eq.expected_output_flat)) then
+    ok = .false.
+    write(stderr, '(a)') 'forward returned incorrect values.. failed'
+  end if
+
+  call embedding % backward(sample_input, sample_gradient)
+  dw_flat = reshape(embedding % dw, shape(dw_flat))
+  if (.not. all(dw_flat.eq.expected_dw_flat)) then
+    ok = .false.
+    write(stderr, '(a)') 'backward returned incorrect dw values.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_embedding_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.'
+    stop 1
+  end if
+end program test_embedding_layer

From e6b54de5cfe1b6bd547af9737d2523cfebc2d258 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 19 Feb 2025 23:43:29 +0400
Subject: [PATCH 03/14] embedding_layer: remove gradient attribute

---
 src/nf/nf_embedding_layer.f90                                    | 1 -
 ..._embedding_submodule.f90 => nf_embedding_layer_submodule.f90} | 1 -
 2 files changed, 2 deletions(-)
 rename src/nf/{nf_embedding_submodule.f90 => nf_embedding_layer_submodule.f90} (97%)

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
index 0cb4f923..e4f3e10e 100644
--- a/src/nf/nf_embedding_layer.f90
+++ b/src/nf/nf_embedding_layer.f90
@@ -18,7 +18,6 @@ module nf_embedding_layer
 
     real, allocatable :: weights(:, :)
     real, allocatable :: output(:, :)
-    real, allocatable :: gradient(:, :) ! input gradient
     real, allocatable :: dw(:, :) ! weight gradients
 
   contains
diff --git a/src/nf/nf_embedding_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
similarity index 97%
rename from src/nf/nf_embedding_submodule.f90
rename to src/nf/nf_embedding_layer_submodule.f90
index fd7d6bc6..5ae5421a 100644
--- a/src/nf/nf_embedding_submodule.f90
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -17,7 +17,6 @@ module subroutine init(self, input_shape)
     self % sequence_length = input_shape(1)
 
     allocate(self % output(self % sequence_length, self % model_dimension))
-    allocate(self % gradient(self % sequence_length, self % vocab_size))
 
     allocate(self % weights(self % vocab_size, self % model_dimension))
     self % weights = 0.1

From 48efd075cfa65ca7eea4a997984bc526f4d749b8 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 01:00:29 +0400
Subject: [PATCH 04/14] embedding_layer: guard against zeros

---
 src/nf/nf_embedding_layer_submodule.f90 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
index 5ae5421a..5e38f22e 100644
--- a/src/nf/nf_embedding_layer_submodule.f90
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -34,6 +34,8 @@ pure module subroutine forward(self, input)
       index = input(i)
       if (index > size(self % weights, 1)) then
         index = 1
+      elseif (index == 0) then
+        index = 1
       end if
       self % output(i, :) = self % weights(index, :)
     end do

From 4cdd2e52580373ece52d57bd7c9c681dc3345a08 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 01:01:00 +0400
Subject: [PATCH 05/14] embedding_layer: plumbing

---
 src/nf.f90                                 |  3 ++-
 src/nf/nf_layer_constructors.f90           | 26 +++++++++++++---------
 src/nf/nf_layer_constructors_submodule.f90 | 18 +++++++++++++++
 src/nf/nf_layer_submodule.f90              | 21 +++++++++++++++++
 src/nf/nf_network_submodule.f90            | 10 ++++++---
 5 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/src/nf.f90 b/src/nf.f90
index 39f67ea3..be392154 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -11,7 +11,8 @@ module nf
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    embedding
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index db60cf0f..fb99f502 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -17,7 +17,8 @@ module nf_layer_constructors
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    embedding
 
   interface input
 
@@ -222,15 +223,20 @@ module function linear2d(out_features) result(res)
         !! Resulting layer instance
     end function linear2d
 
-  module function self_attention(num_heads) result(res)
-    !! Rank-2 (sequence_length, out_features) self attention constructor.
-    !! sequence_length and model_dimension are determined at layer initialization, based on the
-    !! output shape of the previous layer.
-    integer, intent(in) :: num_heads
-      !! Number of attention heads
-    type(layer) :: res
-      !! Resulting layer instance
-  end function self_attention
+    module function self_attention(num_heads) result(res)
+      !! Rank-2 (sequence_length, out_features) self attention constructor.
+      !! sequence_length and model_dimension are determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: num_heads
+        !! Number of attention heads
+      type(layer) :: res
+        !! Resulting layer instance
+    end function self_attention
+
+    module function embedding(sequence_length, vocab_size, model_dimension) result(res)
+      integer, intent(in) :: sequence_length, vocab_size, model_dimension
+      type(layer) :: res
+    end function embedding
 
   end interface
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 9e5322c1..a10f4c81 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -12,6 +12,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
@@ -171,6 +172,7 @@ module function linear2d(out_features) result(res)
 
   end function linear2d
 
+
   module function self_attention(num_heads) result(res)
     integer, intent(in) :: num_heads
     type(layer) :: res
@@ -179,4 +181,20 @@ module function self_attention(num_heads) result(res)
     allocate(res % p, source=self_attention_layer(num_heads))
   end function self_attention
 
+
+  module function embedding(sequence_length, vocab_size, model_dimension) result(res)
+    integer, intent(in) :: sequence_length, vocab_size, model_dimension
+    type(layer) :: res
+    type(embedding_layer) :: embedding_layer_instance
+
+    embedding_layer_instance = embedding_layer(vocab_size, model_dimension)
+    call embedding_layer_instance % init([sequence_length])
+    res % name = 'embedding'
+    res % layer_shape = [sequence_length, model_dimension]
+    res % input_layer_shape = [integer ::]
+    allocate(res % p, source=embedding_layer_instance)
+    res % initialized = .true.
+
+  end function embedding
+
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ecdeb41d..a11fb66d 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -12,6 +12,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
   use nf_optimizers, only: optimizer_base_type
 
 contains
@@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -80,6 +83,8 @@ pure module subroutine backward_2d(self, previous, gradient)
         select type(prev_layer => previous % p)
           type is(input2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(linear2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
@@ -91,6 +96,8 @@ pure module subroutine backward_2d(self, previous, gradient)
         select type(prev_layer => previous % p)
           type is(input2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(embedding_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(linear2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
@@ -254,6 +261,8 @@ module subroutine forward(self, input)
         select type(prev_layer => input % p)
           type is(input2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(embedding_layer)
+            call this_layer % forward(prev_layer % output)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
@@ -266,6 +275,8 @@ module subroutine forward(self, input)
         select type(prev_layer => input % p)
           type is(input2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(embedding_layer)
+            call this_layer % forward(prev_layer % output)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
@@ -307,6 +318,8 @@ pure module subroutine get_output_2d(self, output)
 
       type is(input2d_layer)
         allocate(output, source=this_layer % output)
+      type is(embedding_layer)
+        allocate(output, source=this_layer % output)
       type is(linear2d_layer)
         allocate(output, source=this_layer % output)
       type is(self_attention_layer)
@@ -425,6 +438,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = this_layer % get_num_params()
       type is (self_attention_layer)
         num_params = this_layer % get_num_params()
+      type is (embedding_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -458,6 +473,8 @@ module function get_params(self) result(params)
         params = this_layer % get_params()
       type is (self_attention_layer)
         params = this_layer % get_params()
+      type is (embedding_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -491,6 +508,8 @@ module function get_gradients(self) result(gradients)
         gradients = this_layer % get_gradients()
       type is (self_attention_layer)
         gradients = this_layer % get_gradients()
+      type is (embedding_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -548,6 +567,8 @@ module subroutine set_params(self, params)
 
       type is (self_attention_layer)
         call this_layer % set_params(params)
+      type is (embedding_layer)
+        call this_layer % set_params(params)
 
       type is (maxpool2d_layer)
         ! No parameters to set.
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index f344c5c5..5cd79f5a 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -11,6 +11,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_embedding_layer, only: embedding_layer
   use nf_layer, only: layer
   use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape
   use nf_loss, only: quadratic
@@ -46,7 +47,7 @@ module function network_from_layers(layers) result(res)
       error stop 'Error: A network must have at least 2 layers.'
 
     ! The first layer must be an input layer
-    if (.not. layers(1) % name == 'input') &
+    if (.not. layers(1) % name == 'input' .and. .not. layers(1) % name == 'embedding') &
       error stop 'Error: First layer in the network must be an input layer.'
 
     !TODO Ensure that the layers are in allowed sequence:
@@ -207,8 +208,11 @@ module subroutine forward_1d(self, input)
     integer :: n
 
     ! Set the input array into the input layer
-    select type(input_layer => self % layers(1) % p); type is(input1d_layer)
-      call input_layer % set(input)
+    select type(input_layer => self % layers(1) % p)
+      type is(input1d_layer)
+        call input_layer % set(input)
+      type is(embedding_layer)
+        call input_layer % forward(nint(input))
     end select
 
     do n = 2, size(self % layers)

From 6bfea21c8a1e4ddb02e00875e3f793ecc54e16b7 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 13:11:50 +0400
Subject: [PATCH 06/14] embedding_layer: positional encoding

---
 src/nf/nf_embedding_layer.f90           | 10 +++++++++-
 src/nf/nf_embedding_layer_submodule.f90 | 26 ++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
index e4f3e10e..fa2d6076 100644
--- a/src/nf/nf_embedding_layer.f90
+++ b/src/nf/nf_embedding_layer.f90
@@ -15,6 +15,7 @@ module nf_embedding_layer
     !! This layer converts them into a table of shape
     !! (`sequence_length`, `model_dimension`)
     integer :: sequence_length, vocab_size, model_dimension
+    logical :: positional
 
     real, allocatable :: weights(:, :)
     real, allocatable :: output(:, :)
@@ -24,6 +25,7 @@ module nf_embedding_layer
 
     procedure :: backward
     procedure :: forward
+    procedure :: positional_encoding
     procedure :: init
     procedure :: get_num_params
     procedure :: get_params
@@ -33,8 +35,9 @@ module nf_embedding_layer
   end type embedding_layer
 
   interface embedding_layer
-    module function embedding_layer_cons(vocab_size, model_dimension) result(res)
+    module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
       integer, intent(in) :: vocab_size, model_dimension
+      logical, optional :: positional
       type(embedding_layer) :: res
     end function embedding_layer_cons
   end interface embedding_layer
@@ -54,6 +57,11 @@ pure module subroutine backward(self, input, gradient)
       real, intent(in) :: gradient(:, :)
     end subroutine backward
 
+    pure module subroutine positional_encoding(self, pos)
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: pos
+    end subroutine positional_encoding
+
     module subroutine init(self, input_shape)
       class(embedding_layer), intent(in out) :: self
       integer, intent(in) :: input_shape(:)
diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
index 5e38f22e..a81e15e3 100644
--- a/src/nf/nf_embedding_layer_submodule.f90
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -2,12 +2,18 @@
   use nf_base_layer, only: base_layer
   implicit none
 contains
-  module function embedding_layer_cons(vocab_size, model_dimension) result(res)
+  module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
     integer, intent(in) :: vocab_size, model_dimension
+    logical, optional :: positional
     type(embedding_layer) :: res
 
     res % vocab_size = vocab_size
     res % model_dimension = model_dimension
+    if (.not. present(positional)) then
+      res % positional = .false.
+    else
+      res % positional = positional
+    end if
   end function embedding_layer_cons
 
   module subroutine init(self, input_shape)
@@ -37,7 +43,12 @@ pure module subroutine forward(self, input)
       elseif (index == 0) then
         index = 1
       end if
+
       self % output(i, :) = self % weights(index, :)
+
+      if (self % positional) then
+        call self % positional_encoding(i)
+      end if
     end do
   end subroutine forward
 
@@ -52,6 +63,19 @@ pure module subroutine backward(self, input, gradient)
     end do
   end subroutine backward
 
+  pure module subroutine positional_encoding(self, pos)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: pos
+    integer :: i
+    real :: theta
+
+    do concurrent(i = 1: floor(real(self % model_dimension) / 2))
+      theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension)
+      self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta)
+      self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta)
+    end do
+  end subroutine positional_encoding
+
   pure module function get_num_params(self) result(num_params)
     class(embedding_layer), intent(in) :: self
     integer :: num_params

From f1b414c155e2af4f6c2cc065418d357122c2afa4 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 13:13:36 +0400
Subject: [PATCH 07/14] embedding_layer: update tests

---
 test/test_embedding_layer.f90 | 93 +++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 25 deletions(-)

diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
index 205ce3ca..028df4f7 100644
--- a/test/test_embedding_layer.f90
+++ b/test/test_embedding_layer.f90
@@ -4,32 +4,9 @@ program test_embedding_layer
   implicit none
 
   logical :: ok = .true.
-  integer :: sample_input(3) = [2, 1, 3]
-  real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2])
-  real :: output_flat(6)
-  real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6])
-  real :: dw_flat(8)
-  real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8])
-  type(embedding_layer) :: embedding
-
-  embedding = embedding_layer(vocab_size=4, model_dimension=2)
-  call embedding % init([3])
-  embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2])
-
-  call embedding % forward(sample_input)
-
-  output_flat = reshape(embedding % output, [6])
-  if (.not. all(output_flat.eq.expected_output_flat)) then
-    ok = .false.
-    write(stderr, '(a)') 'forward returned incorrect values.. failed'
-  end if
 
-  call embedding % backward(sample_input, sample_gradient)
-  dw_flat = reshape(embedding % dw, shape(dw_flat))
-  if (.not. all(dw_flat.eq.expected_dw_flat)) then
-    ok = .false.
-    write(stderr, '(a)') 'backward returned incorrect dw values.. failed'
-  end if
+  call test_simple(ok)
+  call test_positional(ok)
 
   if (ok) then
     print '(a)', 'test_embedding_layer: All tests passed.'
@@ -37,4 +14,70 @@ program test_embedding_layer
     write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.'
     stop 1
   end if
+
+contains
+  subroutine test_simple(ok)
+    logical, intent(in out) :: ok
+
+    integer :: sample_input(3) = [2, 1, 3]
+    real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2])
+    real :: output_flat(6)
+    real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6])
+    real :: dw_flat(8)
+    real :: expected_dw_flat(8) = reshape([0.2, 0.1, 0.3, 0., 0.6, 0.4, 0.6, 0.], [8])
+    type(embedding_layer) :: embedding
+
+    embedding = embedding_layer(vocab_size=4, model_dimension=2)
+    call embedding % init([3])
+    embedding % weights = reshape([0.1, 0.3, 0.5, 0.7, 0.2, 0.4, 0.6, 0.8], [4, 2])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [6])
+    if (.not. all(output_flat.eq.expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+
+    call embedding % backward(sample_input, sample_gradient)
+    dw_flat = reshape(embedding % dw, shape(dw_flat))
+    if (.not. all(dw_flat.eq.expected_dw_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect dw values.. failed'
+    end if
+  end subroutine test_simple
+
+  subroutine test_positional(ok)
+    logical, intent(in out) :: ok
+
+    integer :: sample_input(3) = [2, 1, 3]
+    real :: output_flat(12)
+    real :: expected_output_flat(12) = reshape([&
+        0.3, 0.941471, 1.4092975,&
+        1.3, 0.64030236, 0.08385316,&
+        0.3, 0.10999984, 0.51999867,&
+        1.3, 1.09995, 1.4998&
+    ], [12])
+    type(embedding_layer) :: embedding
+
+    real :: theta
+    integer :: i, pos
+
+    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=.true.)
+    call embedding % init([3])
+    embedding % weights = reshape([&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2&
+    ], [5, 4])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [12])
+    if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then
+      ok = .false.
+      write(stderr, '(a)') 'positional encoding returned incorrect values.. failed'
+    end if
+  end subroutine test_positional
 end program test_embedding_layer

From 10e54d0f0b3f78104567902e17685741f7750ebf Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 13:19:20 +0400
Subject: [PATCH 08/14] embedding_layer: add more comments

---
 src/nf/nf_embedding_layer.f90    | 1 +
 src/nf/nf_layer_constructors.f90 | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
index fa2d6076..8c066ceb 100644
--- a/src/nf/nf_embedding_layer.f90
+++ b/src/nf/nf_embedding_layer.f90
@@ -58,6 +58,7 @@ pure module subroutine backward(self, input, gradient)
     end subroutine backward
 
     pure module subroutine positional_encoding(self, pos)
+      !! Sum embedding with positional info (trigonometric, not trianable)
       class(embedding_layer), intent(in out) :: self
       integer, intent(in) :: pos
     end subroutine positional_encoding
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index fb99f502..63411eec 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -234,6 +234,14 @@ module function self_attention(num_heads) result(res)
     end function self_attention
 
     module function embedding(sequence_length, vocab_size, model_dimension) result(res)
+      !! Embedding layer constructor.
+      !!
+      !! This layer is for inputting token indices from the dictionary to the network.
+      !! Works as a trainable lookup table that converts each index into a vector.
+      !! Embedding layer must be the first layer in a network.
+      !! `sequence_length`: max len of input sequence
+      !! `vocab_size`: length of token vocabulary
+      !! `model_dimension`: size of target embeddings
       integer, intent(in) :: sequence_length, vocab_size, model_dimension
       type(layer) :: res
     end function embedding

From 0165642b996ef395102337636120919d6ff4d9c6 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Thu, 20 Feb 2025 13:22:46 +0400
Subject: [PATCH 09/14] embedding_layer: update cmake

---
 CMakeLists.txt      | 2 ++
 test/CMakeLists.txt | 1 +
 2 files changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1bf2231..562d2e1a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,6 +41,8 @@ add_library(neural-fortran
   src/nf/nf_layer_submodule.f90
   src/nf/nf_linear2d_layer.f90
   src/nf/nf_linear2d_layer_submodule.f90
+  src/nf/nf_embedding_layer.f90
+  src/nf/nf_embedding_layer_submodule.f90
   src/nf/nf_loss.f90
   src/nf/nf_loss_submodule.f90
   src/nf/nf_maxpool2d_layer.f90
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 741e9930..e7bbacc0 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,6 +12,7 @@ foreach(execid
   insert_flatten
   reshape_layer
   multihead_attention_layer
+  embedding_layer
   dense_network
   get_set_network_params
   conv2d_network

From dd0ab319167912200e696ae232c35307efd6ab4d Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Fri, 21 Feb 2025 22:07:58 +0400
Subject: [PATCH 10/14] embedding_layer: pr fixes

---
 src/nf/nf_embedding_layer_submodule.f90 | 4 ++--
 test/test_embedding_layer.f90           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
index a81e15e3..eedcc2ac 100644
--- a/src/nf/nf_embedding_layer_submodule.f90
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -88,7 +88,7 @@ module function get_params(self) result(params)
     real, pointer :: w_(:) => null()
 
     w_(1: product(shape(self % weights))) => self % weights
-    params = [w_]
+    params = w_
   end function get_params
 
   module function get_gradients(self) result(gradients)
@@ -97,7 +97,7 @@ module function get_gradients(self) result(gradients)
     real, pointer :: dw_(:) => null()
 
     dw_(1: product(shape(self % dw))) => self % dw
-    gradients = [dw_]
+    gradients = dw_
   end function get_gradients
 
   module subroutine set_params(self, params)
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
index 028df4f7..4a7b47b6 100644
--- a/test/test_embedding_layer.f90
+++ b/test/test_embedding_layer.f90
@@ -12,7 +12,7 @@ program test_embedding_layer
     print '(a)', 'test_embedding_layer: All tests passed.'
   else
     write(stderr, '(a)') 'test_embedding_layer: One or more tests failed.'
-    stop 1
+    error stop 1
   end if
 
 contains

From 074bcd1edd70569082e730edd743570366aca51e Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 23 Feb 2025 11:44:23 +0400
Subject: [PATCH 11/14] embedding_layer: add absolute positional encoding

---
 src/nf/nf_embedding_layer.f90           | 17 +++++++---
 src/nf/nf_embedding_layer_submodule.f90 | 28 +++++++++++----
 test/test_embedding_layer.f90           | 45 ++++++++++++++++++++++---
 3 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/src/nf/nf_embedding_layer.f90 b/src/nf/nf_embedding_layer.f90
index 8c066ceb..94a868a5 100644
--- a/src/nf/nf_embedding_layer.f90
+++ b/src/nf/nf_embedding_layer.f90
@@ -15,7 +15,7 @@ module nf_embedding_layer
     !! This layer converts them into a table of shape
     !! (`sequence_length`, `model_dimension`)
     integer :: sequence_length, vocab_size, model_dimension
-    logical :: positional
+    integer :: positional
 
     real, allocatable :: weights(:, :)
     real, allocatable :: output(:, :)
@@ -25,7 +25,8 @@ module nf_embedding_layer
 
     procedure :: backward
     procedure :: forward
-    procedure :: positional_encoding
+    procedure :: positional_trigonometric
+    procedure :: positional_absolute
     procedure :: init
     procedure :: get_num_params
     procedure :: get_params
@@ -37,7 +38,7 @@ module nf_embedding_layer
   interface embedding_layer
     module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
       integer, intent(in) :: vocab_size, model_dimension
-      logical, optional :: positional
+      integer, optional :: positional
       type(embedding_layer) :: res
     end function embedding_layer_cons
   end interface embedding_layer
@@ -57,11 +58,17 @@ pure module subroutine backward(self, input, gradient)
       real, intent(in) :: gradient(:, :)
     end subroutine backward
 
-    pure module subroutine positional_encoding(self, pos)
+    pure module subroutine positional_trigonometric(self, pos)
       !! Sum embedding with positional info (trigonometric, not trianable)
       class(embedding_layer), intent(in out) :: self
       integer, intent(in) :: pos
-    end subroutine positional_encoding
+    end subroutine positional_trigonometric
+
+    pure module subroutine positional_absolute(self, pos)
+      !! Sum embedding with absolute position
+      class(embedding_layer), intent(in out) :: self
+      integer, intent(in) :: pos
+    end subroutine positional_absolute
 
     module subroutine init(self, input_shape)
       class(embedding_layer), intent(in out) :: self
diff --git a/src/nf/nf_embedding_layer_submodule.f90 b/src/nf/nf_embedding_layer_submodule.f90
index eedcc2ac..83992b22 100644
--- a/src/nf/nf_embedding_layer_submodule.f90
+++ b/src/nf/nf_embedding_layer_submodule.f90
@@ -1,16 +1,20 @@
+#define NONE 0
+#define TRIGONOMETRIC 1
+#define ABSOLUTE 2
+
 submodule(nf_embedding_layer) nf_embedding_layer_submodule
   use nf_base_layer, only: base_layer
   implicit none
 contains
   module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
     integer, intent(in) :: vocab_size, model_dimension
-    logical, optional :: positional
+    integer, optional :: positional
     type(embedding_layer) :: res
 
     res % vocab_size = vocab_size
     res % model_dimension = model_dimension
     if (.not. present(positional)) then
-      res % positional = .false.
+      res % positional = NONE
     else
       res % positional = positional
     end if
@@ -46,8 +50,10 @@ pure module subroutine forward(self, input)
 
       self % output(i, :) = self % weights(index, :)
 
-      if (self % positional) then
-        call self % positional_encoding(i)
+      if (self % positional == TRIGONOMETRIC) then
+        call self % positional_trigonometric(i)
+      elseif (self % positional == ABSOLUTE) then
+        call self % positional_absolute(i)
       end if
     end do
   end subroutine forward
@@ -63,7 +69,7 @@ pure module subroutine backward(self, input, gradient)
     end do
   end subroutine backward
 
-  pure module subroutine positional_encoding(self, pos)
+  pure module subroutine positional_trigonometric(self, pos)
     class(embedding_layer), intent(in out) :: self
     integer, intent(in) :: pos
     integer :: i
@@ -74,7 +80,17 @@ pure module subroutine positional_encoding(self, pos)
       self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta)
       self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta)
     end do
-  end subroutine positional_encoding
+  end subroutine positional_trigonometric
+
+  pure module subroutine positional_absolute(self, pos)
+    class(embedding_layer), intent(in out) :: self
+    integer, intent(in) :: pos
+    integer :: i
+
+    do concurrent(i = 1: self % model_dimension)
+      self % output(pos, i) = self % output(pos, i) + pos - 1
+    end do
+  end subroutine positional_absolute
 
   pure module function get_num_params(self) result(num_params)
     class(embedding_layer), intent(in) :: self
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
index 4a7b47b6..8f135db8 100644
--- a/test/test_embedding_layer.f90
+++ b/test/test_embedding_layer.f90
@@ -6,7 +6,8 @@ program test_embedding_layer
   logical :: ok = .true.
 
   call test_simple(ok)
-  call test_positional(ok)
+  call test_positional_trigonometric(ok)
+  call test_positional_absolute(ok)
 
   if (ok) then
     print '(a)', 'test_embedding_layer: All tests passed.'
@@ -47,7 +48,7 @@ subroutine test_simple(ok)
     end if
   end subroutine test_simple
 
-  subroutine test_positional(ok)
+  subroutine test_positional_trigonometric(ok)
     logical, intent(in out) :: ok
 
     integer :: sample_input(3) = [2, 1, 3]
@@ -63,7 +64,7 @@ subroutine test_positional(ok)
     real :: theta
     integer :: i, pos
 
-    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=.true.)
+    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=1)
     call embedding % init([3])
     embedding % weights = reshape([&
         0.1, 0.3, 0.5, 0.7, 0.2,&
@@ -77,7 +78,41 @@ subroutine test_positional(ok)
     output_flat = reshape(embedding % output, [12])
     if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then
       ok = .false.
-      write(stderr, '(a)') 'positional encoding returned incorrect values.. failed'
+      write(stderr, '(a)') 'trigonometric positional encoding returned incorrect values.. failed'
     end if
-  end subroutine test_positional
+  end subroutine test_positional_trigonometric
+
+  subroutine test_positional_absolute(ok)
+    logical, intent(in out) :: ok
+
+    integer :: sample_input(3) = [2, 1, 3]
+    real :: output_flat(12)
+    real :: expected_output_flat(12) = reshape([&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5,&
+        0.3, 1.1, 2.5&
+    ], [12])
+    type(embedding_layer) :: embedding
+
+    real :: theta
+    integer :: i, pos
+
+    embedding = embedding_layer(vocab_size=5, model_dimension=4, positional=2)
+    call embedding % init([3])
+    embedding % weights = reshape([&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2,&
+        0.1, 0.3, 0.5, 0.7, 0.2&
+    ], [5, 4])
+
+    call embedding % forward(sample_input)
+
+    output_flat = reshape(embedding % output, [12])
+    if (.not. all(abs(output_flat - expected_output_flat) <= (1e-06 + 1e-05 * abs(expected_output_flat)))) then
+      ok = .false.
+      write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed'
+    end if
+  end subroutine test_positional_absolute
 end program test_embedding_layer

From 73799bd5a4693b6be0e990a3db5b3f80134d6344 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 23 Feb 2025 14:49:32 +0400
Subject: [PATCH 12/14] embedding_layer: update constructor and tests

---
 src/nf/nf_layer_constructors.f90           |  3 +-
 src/nf/nf_layer_constructors_submodule.f90 |  5 ++--
 test/test_embedding_layer.f90              | 33 ++++++++++++++++------
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index 63411eec..0f17cc8d 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -233,7 +233,7 @@ module function self_attention(num_heads) result(res)
         !! Resulting layer instance
     end function self_attention
 
-    module function embedding(sequence_length, vocab_size, model_dimension) result(res)
+    module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
       !! Embedding layer constructor.
       !!
       !! This layer is for inputting token indices from the dictionary to the network.
@@ -243,6 +243,7 @@ module function embedding(sequence_length, vocab_size, model_dimension) result(r
       !! `vocab_size`: length of token vocabulary
       !! `model_dimension`: size of target embeddings
       integer, intent(in) :: sequence_length, vocab_size, model_dimension
+      integer, optional, intent(in) :: positional
       type(layer) :: res
     end function embedding
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index a10f4c81..329f7d3d 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -182,12 +182,13 @@ module function self_attention(num_heads) result(res)
   end function self_attention
 
 
-  module function embedding(sequence_length, vocab_size, model_dimension) result(res)
+  module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
     integer, intent(in) :: sequence_length, vocab_size, model_dimension
+    integer, optional, intent(in) :: positional
     type(layer) :: res
     type(embedding_layer) :: embedding_layer_instance
 
-    embedding_layer_instance = embedding_layer(vocab_size, model_dimension)
+    embedding_layer_instance = embedding_layer(vocab_size, model_dimension, positional)
     call embedding_layer_instance % init([sequence_length])
     res % name = 'embedding'
     res % layer_shape = [sequence_length, model_dimension]
diff --git a/test/test_embedding_layer.f90 b/test/test_embedding_layer.f90
index 8f135db8..99b7fca6 100644
--- a/test/test_embedding_layer.f90
+++ b/test/test_embedding_layer.f90
@@ -1,13 +1,16 @@
 program test_embedding_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf_embedding_layer, only: embedding_layer
+  use nf_layer, only: layer
+  use nf_layer_constructors, only: embedding_constructor => embedding
   implicit none
 
   logical :: ok = .true.
+  integer :: sample_input(3) = [2, 1, 3]
 
-  call test_simple(ok)
-  call test_positional_trigonometric(ok)
-  call test_positional_absolute(ok)
+  call test_simple(ok, sample_input)
+  call test_positional_trigonometric(ok, sample_input)
+  call test_positional_absolute(ok, sample_input)
 
   if (ok) then
     print '(a)', 'test_embedding_layer: All tests passed.'
@@ -17,10 +20,10 @@ program test_embedding_layer
   end if
 
 contains
-  subroutine test_simple(ok)
+  subroutine test_simple(ok, sample_input)
     logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
 
-    integer :: sample_input(3) = [2, 1, 3]
     real :: sample_gradient(3, 2) = reshape([0.1, 0.2, 0.3, 0.4, 0.6, 0.6], [3, 2])
     real :: output_flat(6)
     real :: expected_output_flat(6) = reshape([0.3, 0.1, 0.5, 0.4, 0.2, 0.6], [6])
@@ -48,10 +51,10 @@ subroutine test_simple(ok)
     end if
   end subroutine test_simple
 
-  subroutine test_positional_trigonometric(ok)
+  subroutine test_positional_trigonometric(ok, sample_input)
     logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
 
-    integer :: sample_input(3) = [2, 1, 3]
     real :: output_flat(12)
     real :: expected_output_flat(12) = reshape([&
         0.3, 0.941471, 1.4092975,&
@@ -82,10 +85,10 @@ subroutine test_positional_trigonometric(ok)
     end if
   end subroutine test_positional_trigonometric
 
-  subroutine test_positional_absolute(ok)
+  subroutine test_positional_absolute(ok, sample_input)
     logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
 
-    integer :: sample_input(3) = [2, 1, 3]
     real :: output_flat(12)
     real :: expected_output_flat(12) = reshape([&
         0.3, 1.1, 2.5,&
@@ -115,4 +118,16 @@ subroutine test_positional_absolute(ok)
       write(stderr, '(a)') 'absolute positional encoding returned incorrect values.. failed'
     end if
   end subroutine test_positional_absolute
+
+  subroutine test_embedding_constructor(ok, sample_input)
+    logical, intent(in out) :: ok
+    integer, intent(in) :: sample_input(:)
+
+    type(layer) :: embedding_constructed
+
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=0)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=1)
+    embedding_constructed = embedding_constructor(sequence_length=3, vocab_size=5, model_dimension=4, positional=2)
+  end subroutine test_embedding_constructor
 end program test_embedding_layer

From fe02beb724e7b400dcc59ed341031b1499db421b Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 23 Feb 2025 21:12:10 +0400
Subject: [PATCH 13/14] embedding_layer: make integer input generics

---
 src/nf/nf_network.f90           | 19 +++++++++++++--
 src/nf/nf_network_submodule.f90 | 42 +++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90
index 5916924e..53d3c07d 100644
--- a/src/nf/nf_network.f90
+++ b/src/nf/nf_network.f90
@@ -32,17 +32,19 @@ module nf_network
 
     procedure, private :: evaluate_batch_1d
     procedure, private :: forward_1d
+    procedure, private :: forward_1d_int
     procedure, private :: forward_2d
     procedure, private :: forward_3d
     procedure, private :: predict_1d
+    procedure, private :: predict_1d_int
     procedure, private :: predict_2d
     procedure, private :: predict_3d
     procedure, private :: predict_batch_1d
     procedure, private :: predict_batch_3d
 
     generic :: evaluate => evaluate_batch_1d
-    generic :: forward => forward_1d, forward_2d, forward_3d
-    generic :: predict => predict_1d, predict_2d, predict_3d
+    generic :: forward => forward_1d, forward_1d_int, forward_2d, forward_3d
+    generic :: predict => predict_1d, predict_1d_int, predict_2d, predict_3d
     generic :: predict_batch => predict_batch_1d, predict_batch_3d
 
   end type network
@@ -95,6 +97,12 @@ module subroutine forward_1d(self, input)
         !! 1-d input data
     end subroutine forward_1d
 
+    module subroutine forward_1d_int(self, input)
+      !! Same as `forward_1d` except `integer`
+      class(network), intent(in out) :: self
+      integer, intent(in) :: input(:)
+    end subroutine forward_1d_int
+
     module subroutine forward_2d(self, input)
       !! Apply a forward pass through the network.
       !!
@@ -137,6 +145,13 @@ module function predict_1d(self, input) result(res)
         !! Output of the network
     end function predict_1d
 
+    module function predict_1d_int(self, input) result(res)
+      !! Same as `predict_1d` except `integer`
+      class(network), intent(in out) :: self
+      integer, intent(in) :: input(:)
+      real, allocatable :: res(:)
+    end function predict_1d_int
+
     module function predict_2d(self, input) result(res)
       !! Return the output of the network given the input 1-d array.
       class(network), intent(in out) :: self
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 5cd79f5a..5816db12 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -211,8 +211,6 @@ module subroutine forward_1d(self, input)
     select type(input_layer => self % layers(1) % p)
       type is(input1d_layer)
         call input_layer % set(input)
-      type is(embedding_layer)
-        call input_layer % forward(nint(input))
     end select
 
     do n = 2, size(self % layers)
@@ -221,6 +219,21 @@ module subroutine forward_1d(self, input)
 
   end subroutine forward_1d
 
+  module subroutine forward_1d_int(self, input)
+    class(network), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    integer :: n
+
+    select type(input_layer => self % layers(1) % p)
+      type is(embedding_layer)
+        call input_layer % forward(input)
+    end select
+
+    do n = 2, size(self % layers)
+      call self % layers(n) % forward(self % layers(n - 1))
+    end do
+
+  end subroutine forward_1d_int
 
   module subroutine forward_2d(self, input)
     class(network), intent(in out) :: self
@@ -285,6 +298,31 @@ module function predict_1d(self, input) result(res)
 
   end function predict_1d
 
+  module function predict_1d_int(self, input) result(res)
+    class(network), intent(in out) :: self
+    integer, intent(in) :: input(:)
+    real, allocatable :: res(:)
+    integer :: n, num_layers
+
+    num_layers = size(self % layers)
+
+    call self % set_training_mode(.false.)
+    call self % forward(input)
+    call self % set_training_mode(.true.)
+
+    select type(output_layer => self % layers(num_layers) % p)
+      type is(dense_layer)
+        res = output_layer % output
+      type is(dropout_layer)
+        res = output_layer % output
+      type is(flatten_layer)
+        res = output_layer % output
+      class default
+        error stop 'network % output not implemented for ' // &
+          trim(self % layers(num_layers) % name) // ' layer'
+    end select
+
+  end function predict_1d_int
 
   module function predict_2d(self, input) result(res)
     class(network), intent(in out) :: self

From e97be10a73d2d98a8c7bd790991cbee998ca7a79 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Tue, 4 Mar 2025 14:21:25 +0400
Subject: [PATCH 14/14] embedding_layer: update readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 9fe3fab0..e94296a3 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
+| Embedding | `embedding` | n/a | 2 | ✅ | ✅ |
 | Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ |
 | Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
 | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |