From 64e1b696abb5c7526cd7e34f40e20996f7db35e1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 14:25:15 -0600 Subject: [PATCH 01/31] Prototyping RNN layer based on Dense The dimensions don't match, but let's start with something that compile. --- CMakeLists.txt | 2 + src/nf/nf_rnn_layer.f90 | 131 +++++++++++++++++++++++++++ src/nf/nf_rnn_layer_submodule.f90 | 143 ++++++++++++++++++++++++++++++ 3 files changed, 276 insertions(+) create mode 100644 src/nf/nf_rnn_layer.f90 create mode 100644 src/nf/nf_rnn_layer_submodule.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 490f7ff1..91aad008 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,8 @@ add_library(neural-fortran src/nf/nf_random.f90 src/nf/nf_reshape_layer.f90 src/nf/nf_reshape_layer_submodule.f90 + src/nf/nf_rnn_layer.f90 + src/nf/nf_rnn_layer_submodule.f90 src/nf/io/nf_io_binary.f90 src/nf/io/nf_io_binary_submodule.f90 ) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 new file mode 100644 index 00000000..e95a404f --- /dev/null +++ b/src/nf/nf_rnn_layer.f90 @@ -0,0 +1,131 @@ +module nf_rnn_layer + + !! This module provides the concrete dense layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: rnn_layer + + type, extends(base_layer) :: rnn_layer + + !! Concrete implementation of a dense (fully-connected) layer type + + integer :: input_size + integer :: output_size + + real, allocatable :: weights(:,:) + real, allocatable :: recurrent(:,:) + real, allocatable :: biases(:) + real, allocatable :: state(:) + real, allocatable :: z(:) ! matmul(x, w) + b + real, allocatable :: output(:) ! activation(z) + real, allocatable :: gradient(:) ! matmul(w, db) + real, allocatable :: dw(:,:) ! weight gradients + real, allocatable :: db(:) ! bias gradients + + class(activation_function), allocatable :: activation + + contains + + !procedure :: backward + !procedure :: forward + !procedure :: get_gradients + procedure :: get_num_params + !procedure :: get_params + procedure :: init + !procedure :: set_params + + end type rnn_layer + + interface rnn_layer + elemental module function rnn_layer_cons(output_size, activation) & + result(res) + !! This function returns the `dense_layer` instance. + integer, intent(in) :: output_size + !! Number of neurons in this layer + class(activation_function), intent(in) :: activation + !! Instance of the activation_function to use; + !! See nf_activation.f90 for available functions. + type(rnn_layer) :: res + !! dense_layer instance + end function rnn_layer_cons + end interface rnn_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward gradient descent pass. + !! Only weight and bias gradients are updated in this subroutine, + !! while the weights and biases themselves are untouched. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + real, intent(in) :: gradient(:) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Propagate forward the layer. + !! Calling this subroutine updates the values of a few data components + !! of `dense_layer` that are needed for the backward pass. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + end subroutine forward + + pure module function get_num_params(self) result(num_params) + !! Return the number of parameters in this layer. + class(rnn_layer), intent(in) :: self + !! Dense layer instance + integer :: num_params + !! Number of parameters in this layer + end function get_num_params + + pure module function get_params(self) result(params) + !! Return the parameters (weights and biases) of this layer. + !! The parameters are ordered as weights first, biases second. + class(rnn_layer), intent(in) :: self + !! Dense layer instance + real, allocatable :: params(:) + !! Parameters of this layer + end function get_params + + pure module function get_gradients(self) result(gradients) + !! Return the gradients of this layer. + !! The gradients are ordered as weights first, biases second. + class(rnn_layer), intent(in) :: self + !! Dense layer instance + real, allocatable :: gradients(:) + !! Gradients of this layer + end function get_gradients + + module subroutine set_params(self, params) + !! Set the parameters of this layer. + !! The parameters are ordered as weights first, biases second. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: params(:) + !! Parameters of this layer + end subroutine set_params + + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + end subroutine init + + end interface + +end module nf_rnn_layer diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 new file mode 100644 index 00000000..91658d84 --- /dev/null +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -0,0 +1,143 @@ +submodule(nf_rnn_layer) nf_rnn_layer_submodule + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + use nf_random, only: random_normal + + implicit none + +contains + + elemental module function rnn_layer_cons(output_size, activation) & + result(res) + integer, intent(in) :: output_size + class(activation_function), intent(in) :: activation + type(rnn_layer) :: res + + res % output_size = output_size + res % activation_name = activation % get_name() + allocate( res % activation, source = activation ) + + end function rnn_layer_cons + + + pure module subroutine backward(self, input, gradient) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: input(:) + real, intent(in) :: gradient(:) + real :: db(self % output_size) + real :: dw(self % input_size, self % output_size) + + db = gradient * self % activation % eval_prime(self % z) + dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) + self % gradient = matmul(self % weights, db) + self % dw = self % dw + dw + self % db = self % db + db + + end subroutine backward + + + pure module subroutine forward(self, input) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: input(:) + + self % z = matmul(input, self % weights) + self % biases + self % output = self % activation % eval(self % z) + + end subroutine forward + + + pure module function get_num_params(self) result(num_params) + class(rnn_layer), intent(in) :: self + integer :: num_params + + ! Number of weigths times number of biases + num_params = self % input_size * self % output_size + self % output_size + + end function get_num_params + + + pure module function get_params(self) result(params) + class(rnn_layer), intent(in) :: self + real, allocatable :: params(:) + + params = [ & + pack(self % weights, .true.), & + pack(self % biases, .true.) & + ] + + end function get_params + + + pure module function get_gradients(self) result(gradients) + class(rnn_layer), intent(in) :: self + real, allocatable :: gradients(:) + + gradients = [ & + pack(self % dw, .true.), & + pack(self % db, .true.) & + ] + + end function get_gradients + + + module subroutine set_params(self, params) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: params(:) + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + ! reshape the weights + self % weights = reshape( & + params(:self % input_size * self % output_size), & + [self % input_size, self % output_size] & + ) + + ! reshape the biases + self % biases = reshape( & + params(self % input_size * self % output_size + 1:), & + [self % output_size] & + ) + + end subroutine set_params + + + module subroutine init(self, input_shape) + class(rnn_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % input_size = input_shape(1) + + ! Weights are a 2-d array of shape previous layer size + ! times this layer size. + allocate(self % weights(self % input_size, self % output_size)) + call random_normal(self % weights) + self % weights = self % weights / self % input_size + + ! Broadcast weights to all other images, if any. + call co_broadcast(self % weights, 1) + + allocate(self % biases(self % output_size)) + self % biases = 0 + + allocate(self % output(self % output_size)) + self % output = 0 + + allocate(self % z(self % output_size)) + self % z = 0 + + allocate(self % dw(self % input_size, self % output_size)) + self % dw = 0 + + allocate(self % db(self % output_size)) + self % db = 0 + + allocate(self % gradient(self % output_size)) + self % gradient = 0 + + end subroutine init + +end submodule nf_rnn_layer_submodule From 8c119118a3126673bfd700988bfd1fa1d19ab16c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 14:28:21 -0600 Subject: [PATCH 02/31] Extenting uses --- src/nf/nf_layer_constructors.f90 | 2 +- src/nf/nf_layer_constructors_submodule.f90 | 1 + src/nf/nf_layer_submodule.f90 | 3 +++ src/nf/nf_network_submodule.f90 | 5 ++++- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 309be6e4..a46daa37 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape + public :: conv2d, dense, flatten, input, maxpool2d, reshape, rnn interface input diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 234b20b1..606ae39e 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -8,6 +8,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_rnn_layer, only: rnn_layer use nf_activation, only: activation_function, relu, sigmoid implicit none diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index c672581a..aa02449b 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -8,6 +8,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_rnn_layer, only: rnn_layer use nf_optimizers, only: optimizer_base_type contains @@ -292,6 +293,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 + type is (rnn_layer) + num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 140c9226..89d8de6c 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -7,8 +7,11 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_rnn_layer, only: rnn_layer + use nf_io_hdf5, only: get_hdf5_dataset + use nf_keras, only: get_keras_h5_layers, keras_layer use nf_layer, only: layer - use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape + use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape, rnn use nf_loss, only: quadratic use nf_optimizers, only: optimizer_base_type, sgd use nf_parallel, only: tile_indices From adef7d75d0dadba045de7e3612d14831b5ad8a8e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 14:33:37 -0600 Subject: [PATCH 03/31] Reading coefficients from h5f model Note a hardcoded 'simple_rnn_cell_23' that must be resolved later. --- src/nf/nf_network_submodule.f90 | 204 +++++++++++++++++++++++++++++++- 1 file changed, 203 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 89d8de6c..8b618bce 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -96,7 +96,209 @@ module function network_from_layers(layers) result(res) end function network_from_layers - module subroutine backward(self, output, loss) + module function network_from_keras(filename) result(res) + character(*), intent(in) :: filename + type(network) :: res + type(keras_layer), allocatable :: keras_layers(:) + type(layer), allocatable :: layers(:) + character(:), allocatable :: layer_name + character(:), allocatable :: object_name + integer :: n + + keras_layers = get_keras_h5_layers(filename) + + allocate(layers(size(keras_layers))) + + do n = 1, size(layers) + + select case(keras_layers(n) % class) + + case('Conv2D') + + if (keras_layers(n) % kernel_size(1) & + /= keras_layers(n) % kernel_size(2)) & + error stop 'Non-square kernel in conv2d layer not supported.' + + layers(n) = conv2d( & + keras_layers(n) % filters, & + !FIXME add support for non-square kernel + keras_layers(n) % kernel_size(1), & + get_activation_by_name(keras_layers(n) % activation) & + ) + + case('Dense') + + layers(n) = dense( & + keras_layers(n) % units(1), & + get_activation_by_name(keras_layers(n) % activation) & + ) + + case('Flatten') + layers(n) = flatten() + + case('InputLayer') + if (size(keras_layers(n) % units) == 1) then + ! input1d + layers(n) = input(keras_layers(n) % units(1)) + else + ! input3d + layers(n) = input(keras_layers(n) % units) + end if + + case('MaxPooling2D') + + if (keras_layers(n) % pool_size(1) & + /= keras_layers(n) % pool_size(2)) & + error stop 'Non-square pool in maxpool2d layer not supported.' + + if (keras_layers(n) % strides(1) & + /= keras_layers(n) % strides(2)) & + error stop 'Unequal strides in maxpool2d layer are not supported.' + + layers(n) = maxpool2d( & + !FIXME add support for non-square pool and stride + keras_layers(n) % pool_size(1), & + keras_layers(n) % strides(1) & + ) + + case('Reshape') + layers(n) = reshape(keras_layers(n) % target_shape) + + case default + error stop 'This Keras layer is not supported' + + end select + + end do + + res = network(layers) + + ! Loop over layers and read weights and biases from the Keras h5 file + ! for each; currently only dense layers are implemented. + do n = 2, size(res % layers) + + layer_name = keras_layers(n) % name + + select type(this_layer => res % layers(n) % p) + + type is(conv2d_layer) + ! Read biases from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/bias:0' + call get_hdf5_dataset(filename, object_name, this_layer % biases) + + ! Read weights from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/kernel:0' + call get_hdf5_dataset(filename, object_name, this_layer % kernel) + + type is(dense_layer) + + ! Read biases from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/bias:0' + call get_hdf5_dataset(filename, object_name, this_layer % biases) + + ! Read weights from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/kernel:0' + call get_hdf5_dataset(filename, object_name, this_layer % weights) + + type is(flatten_layer) + ! Nothing to do + continue + + type is(maxpool2d_layer) + ! Nothing to do + continue + + type is(reshape3d_layer) + ! Nothing to do + continue + + type is(rnn_layer) + + ! Read biases from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/simple_rnn_cell_23/bias:0' + call get_hdf5_dataset(filename, object_name, this_layer % biases) + + ! Read weights from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/simple_rnn_cell_23/kernel:0' + call get_hdf5_dataset(filename, object_name, this_layer % weights) + + ! Read recurrent weights from file + object_name = '/model_weights/' // layer_name // '/' & + // layer_name // '/simple_rnn_cell_23/recurrent_kernel:0' + call get_hdf5_dataset(filename, object_name, this_layer % recurrent) + + class default + error stop 'Internal error in network_from_keras(); ' & + // 'mismatch in layer types between the Keras and ' & + // 'neural-fortran model layers.' + + end select + + end do + + end function network_from_keras + + + pure function get_activation_by_name(activation_name) result(res) + ! Workaround to get activation_function with some + ! hardcoded default parameters by its name. + ! Need this function since we get only activation name + ! from keras files. + character(len=*), intent(in) :: activation_name + class(activation_function), allocatable :: res + + select case(trim(activation_name)) + case('elu') + allocate ( res, source = elu(alpha = 0.1) ) + + case('exponential') + allocate ( res, source = exponential() ) + + case('gaussian') + allocate ( res, source = gaussian() ) + + case('linear') + allocate ( res, source = linear() ) + + case('relu') + allocate ( res, source = relu() ) + + case('leaky_relu') + allocate ( res, source = leaky_relu(alpha = 0.1) ) + + case('sigmoid') + allocate ( res, source = sigmoid() ) + + case('softmax') + allocate ( res, source = softmax() ) + + case('softplus') + allocate ( res, source = softplus() ) + + case('step') + allocate ( res, source = step() ) + + case('tanh') + allocate ( res, source = tanhf() ) + + case('celu') + allocate ( res, source = celu() ) + + case default + error stop 'activation_name must be one of: ' // & + '"elu", "exponential", "gaussian", "linear", "relu", ' // & + '"leaky_relu", "sigmoid", "softmax", "softplus", "step", "tanh" or "celu".' + end select + + end function get_activation_by_name + + pure module subroutine backward(self, output, loss) class(network), intent(in out) :: self real, intent(in) :: output(:) class(loss_type), intent(in), optional :: loss From b51d66f7b909161732ef65e4fa623690457e85a5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 15:23:45 -0600 Subject: [PATCH 04/31] feat: get_params() --- src/nf/nf_layer_submodule.f90 | 2 ++ src/nf/nf_rnn_layer.f90 | 2 +- src/nf/nf_rnn_layer_submodule.f90 | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index aa02449b..af3b7969 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -320,6 +320,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. + type is (rnn_layer) + params = this_layer % get_params() class default error stop 'Unknown layer type.' end select diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index e95a404f..218cb9be 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -37,7 +37,7 @@ module nf_rnn_layer !procedure :: forward !procedure :: get_gradients procedure :: get_num_params - !procedure :: get_params + procedure :: get_params procedure :: init !procedure :: set_params diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 91658d84..2e7ac47c 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -63,6 +63,7 @@ pure module function get_params(self) result(params) params = [ & pack(self % weights, .true.), & + pack(self % recurrent, .true.), & pack(self % biases, .true.) & ] From a7975021ae86426e0746a3c8817c43863431d194 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 15:37:16 -0600 Subject: [PATCH 05/31] feat: set_params() --- src/nf/nf_rnn_layer.f90 | 2 +- src/nf/nf_rnn_layer_submodule.f90 | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index 218cb9be..dc3283d2 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -39,7 +39,7 @@ module nf_rnn_layer procedure :: get_num_params procedure :: get_params procedure :: init - !procedure :: set_params + procedure :: set_params end type rnn_layer diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 2e7ac47c..048733da 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -85,6 +85,7 @@ end function get_gradients module subroutine set_params(self, params) class(rnn_layer), intent(in out) :: self real, intent(in) :: params(:) + integer :: first, last ! check if the number of parameters is correct if (size(params) /= self % get_num_params()) then @@ -92,14 +93,24 @@ module subroutine set_params(self, params) end if ! reshape the weights + last = self % input_size * self % output_size self % weights = reshape( & - params(:self % input_size * self % output_size), & + params(:last), & [self % input_size, self % output_size] & ) + ! reshape the recurrent weights + first = last + 1 + last = first + self % output_size * self % output_size + self % recurrent = reshape( & + params(first:last), & + [self % output_size, self % output_size] & + ) + ! reshape the biases + first = last + 1 self % biases = reshape( & - params(self % input_size * self % output_size + 1:), & + params(first:), & [self % output_size] & ) From ff1c3929244f7002a264c07244214784fe1c0e00 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 15:37:53 -0600 Subject: [PATCH 06/31] feat: get_num_params() --- src/nf/nf_rnn_layer_submodule.f90 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 048733da..07bffcdc 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -52,7 +52,9 @@ pure module function get_num_params(self) result(num_params) integer :: num_params ! Number of weigths times number of biases - num_params = self % input_size * self % output_size + self % output_size + num_params = self % input_size * self % output_size & + + self % output_size * self % output_size & + + self % output_size end function get_num_params From f686950fec42ef5fc96bdc88818e007f55e8a2a0 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 15:46:04 -0600 Subject: [PATCH 07/31] Initializing recurrent kernel and states --- src/nf/nf_rnn_layer_submodule.f90 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 07bffcdc..55a5114c 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -131,9 +131,18 @@ module subroutine init(self, input_shape) call random_normal(self % weights) self % weights = self % weights / self % input_size + ! Recurrent weights are a 2-d square array of shape this layer size. + ! Each neuron is adjusted by each state times a recurrent weight. + allocate(self % recurrent(self % output_size, self % output_size)) + call random_normal(self % recurrent) + self % recurrent = self % recurrent / self % output_size + ! Broadcast weights to all other images, if any. call co_broadcast(self % weights, 1) + allocate(self % state(self % output_size)) + self % state = 0 + allocate(self % biases(self % output_size)) self % biases = 0 From acf1afd9ba745fd751b561396423ab27d9016ea1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 26 Oct 2023 15:53:45 -0600 Subject: [PATCH 08/31] feat: forward() --- src/nf/nf_rnn_layer.f90 | 2 +- src/nf/nf_rnn_layer_submodule.f90 | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index dc3283d2..86e33ca3 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -34,7 +34,7 @@ module nf_rnn_layer contains !procedure :: backward - !procedure :: forward + procedure :: forward !procedure :: get_gradients procedure :: get_num_params procedure :: get_params diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 55a5114c..7d047e64 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -41,8 +41,11 @@ pure module subroutine forward(self, input) class(rnn_layer), intent(in out) :: self real, intent(in) :: input(:) - self % z = matmul(input, self % weights) + self % biases - self % output = self % activation % eval(self % z) + self % z = matmul(input, self % weights) & + + matmul(self % state, self % recurrent) & + + self % biases + self % state = self % activation % eval(self % z) + self % output = self % state end subroutine forward From 69fed32ef151e0325100c235b207df45d2212530 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 27 Oct 2023 14:58:50 -0600 Subject: [PATCH 09/31] More informative error messages --- src/nf/nf_layer_submodule.f90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index af3b7969..43ea84c1 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -296,7 +296,7 @@ elemental module function get_num_params(self) result(num_params) type is (rnn_layer) num_params = this_layer % get_num_params() class default - error stop 'Unknown layer type.' + error stop 'get_num_params() with unknown layer type.' end select end function get_num_params @@ -323,7 +323,7 @@ module function get_params(self) result(params) type is (rnn_layer) params = this_layer % get_params() class default - error stop 'Unknown layer type.' + error stop 'get_params() with unknown layer type.' end select end function get_params @@ -348,7 +348,7 @@ module function get_gradients(self) result(gradients) type is (reshape3d_layer) ! No gradients to get. class default - error stop 'Unknown layer type.' + error stop 'get_gradients() with unknown layer type.' end select end function get_gradients @@ -405,7 +405,7 @@ module subroutine set_params(self, params) // 'on a zero-parameter layer; nothing to do.' class default - error stop 'Unknown layer type.' + error stop 'set_params() with unknown layer type.' end select end subroutine set_params From fd24e16377d4afae48f1661d56375eb20caf4b41 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 27 Oct 2023 16:31:04 -0600 Subject: [PATCH 10/31] Minor adjustments on rnn_layer --- src/nf/nf_rnn_layer.f90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index 86e33ca3..36597ca0 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -22,8 +22,8 @@ module nf_rnn_layer real, allocatable :: weights(:,:) real, allocatable :: recurrent(:,:) real, allocatable :: biases(:) - real, allocatable :: state(:) real, allocatable :: z(:) ! matmul(x, w) + b + real, allocatable :: state(:) real, allocatable :: output(:) ! activation(z) real, allocatable :: gradient(:) ! matmul(w, db) real, allocatable :: dw(:,:) ! weight gradients @@ -33,9 +33,9 @@ module nf_rnn_layer contains - !procedure :: backward + procedure :: backward procedure :: forward - !procedure :: get_gradients + procedure :: get_gradients procedure :: get_num_params procedure :: get_params procedure :: init @@ -46,14 +46,14 @@ module nf_rnn_layer interface rnn_layer elemental module function rnn_layer_cons(output_size, activation) & result(res) - !! This function returns the `dense_layer` instance. + !! This function returns the `rnn_layer` instance. integer, intent(in) :: output_size !! Number of neurons in this layer class(activation_function), intent(in) :: activation !! Instance of the activation_function to use; !! See nf_activation.f90 for available functions. type(rnn_layer) :: res - !! dense_layer instance + !! rnn_layer instance end function rnn_layer_cons end interface rnn_layer @@ -74,7 +74,7 @@ end subroutine backward pure module subroutine forward(self, input) !! Propagate forward the layer. !! Calling this subroutine updates the values of a few data components - !! of `dense_layer` that are needed for the backward pass. + !! of `rnn_layer` that are needed for the backward pass. class(rnn_layer), intent(in out) :: self !! Dense layer instance real, intent(in) :: input(:) From 7415081863800ee1d0c87d9255a9bf1044c1bbf7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 28 Oct 2023 16:40:58 -0600 Subject: [PATCH 11/31] Constructor for RNN --- src/nf/nf_layer_constructors.f90 | 23 ++++++++++++++++++++ src/nf/nf_layer_constructors_submodule.f90 | 25 +++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index a46daa37..956ffac5 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -166,6 +166,29 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape + pure module function rnn(layer_size, activation) result(res) + !! Recurrent (fully-connected) layer constructor. + !! + !! This layer is a building block for recurrent, fully-connected + !! networks, or for an output layer of a convolutional network. + !! A recurrent layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: rnn, layer, relu + !! type(layer) :: rnn_layer + !! rnn_layer = rnn(10) + !! rnn_layer = rnn(10, activation=relu()) + !! ``` + integer, intent(in) :: layer_size + !! The number of neurons in a dense layer + class(activation_function), intent(in), optional :: activation + !! Activation function instance (default tanh) + type(layer) :: res + !! Resulting layer instance + end function rnn + end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 606ae39e..9c450a2a 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -9,7 +9,7 @@ use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer use nf_rnn_layer, only: rnn_layer - use nf_activation, only: activation_function, relu, sigmoid + use nf_activation, only: activation_function, relu, sigmoid, tanhf implicit none @@ -135,4 +135,27 @@ module function reshape(output_shape) result(res) end function reshape + pure module function rnn(layer_size, activation) result(res) + integer, intent(in) :: layer_size + class(activation_function), intent(in), optional :: activation + type(layer) :: res + + class(activation_function), allocatable :: activation_tmp + + res % name = 'rnn' + res % layer_shape = [layer_size] + + if (present(activation)) then + allocate(activation_tmp, source=activation) + else + allocate(activation_tmp, source=tanhf()) + end if + + res % activation = activation_tmp % get_name() + + allocate(res % p, source=rnn_layer(layer_size, activation_tmp)) + + end function rnn + + end submodule nf_layer_constructors_submodule From 6f5686336eae76237ac43ba9c2e9f068a9ddcce1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 28 Oct 2023 16:51:50 -0600 Subject: [PATCH 12/31] Loading rnn constructor in the root --- src/nf.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf.f90 b/src/nf.f90 index b97d9e62..670f815e 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape + conv2d, dense, flatten, input, maxpool2d, reshape, rnn use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network From ad598a8b38374a0cde0774956a918696761a2b7c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 10:16:22 -0600 Subject: [PATCH 13/31] Back to 1D concept I'll try with 1D with a state memory and the option to reset state for processing a new time series. --- src/nf/nf_rnn_layer.f90 | 2 +- src/nf/nf_rnn_layer_submodule.f90 | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index 36597ca0..94b6fdaf 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -20,7 +20,7 @@ module nf_rnn_layer integer :: output_size real, allocatable :: weights(:,:) - real, allocatable :: recurrent(:,:) + real, allocatable :: recurrent(:) real, allocatable :: biases(:) real, allocatable :: z(:) ! matmul(x, w) + b real, allocatable :: state(:) diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 7d047e64..eef8d2e0 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -134,17 +134,13 @@ module subroutine init(self, input_shape) call random_normal(self % weights) self % weights = self % weights / self % input_size - ! Recurrent weights are a 2-d square array of shape this layer size. - ! Each neuron is adjusted by each state times a recurrent weight. - allocate(self % recurrent(self % output_size, self % output_size)) - call random_normal(self % recurrent) - self % recurrent = self % recurrent / self % output_size - ! Broadcast weights to all other images, if any. call co_broadcast(self % weights, 1) - allocate(self % state(self % output_size)) - self % state = 0 + allocate(self % recurrent(self % output_size)) + call random_normal(self % recurrent) + self % recurrent = self % recurrent / self % input_size + allocate(self % biases(self % output_size)) self % biases = 0 @@ -155,6 +151,9 @@ module subroutine init(self, input_shape) allocate(self % z(self % output_size)) self % z = 0 + allocate(self % state(self % output_size)) + self % state = 0 + allocate(self % dw(self % input_size, self % output_size)) self % dw = 0 From 0ae7af137cd5ba4014609387360b3769ffcfb4c7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 13:58:28 -0600 Subject: [PATCH 14/31] fix: Recurrent is actually a square matrix Each neuron is affected by all states. With this change the forward procedure is working correctly. I verified a couple of test cases. --- src/nf/nf_rnn_layer.f90 | 2 +- src/nf/nf_rnn_layer_submodule.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index 94b6fdaf..36597ca0 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -20,7 +20,7 @@ module nf_rnn_layer integer :: output_size real, allocatable :: weights(:,:) - real, allocatable :: recurrent(:) + real, allocatable :: recurrent(:,:) real, allocatable :: biases(:) real, allocatable :: z(:) ! matmul(x, w) + b real, allocatable :: state(:) diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index eef8d2e0..aa9b1ce0 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -137,7 +137,7 @@ module subroutine init(self, input_shape) ! Broadcast weights to all other images, if any. call co_broadcast(self % weights, 1) - allocate(self % recurrent(self % output_size)) + allocate(self % recurrent(self % output_size, self % output_size)) call random_normal(self % recurrent) self % recurrent = self % recurrent / self % input_size From c1649243c39615317f2cbb97f0ad0e41d436a20e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 18:09:28 -0600 Subject: [PATCH 15/31] Apply loss function if RNN is the output layer --- src/nf/nf_network_submodule.f90 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 8b618bce..ff3758c8 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -333,6 +333,11 @@ pure module subroutine backward(self, output, loss) self % layers(n - 1), & self % loss % derivative(output, this_layer % output) & ) + type is(rnn_layer) + call self % layers(n) % backward( & + self % layers(n - 1), & + quadratic_derivative(output, this_layer % output) & + ) end select else ! Hidden layer; take the gradient from the next layer From 55ad96d6eec4281fa3641285e200bdb606429b08 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 18:57:21 -0600 Subject: [PATCH 16/31] fix: Getting biases --- src/nf/nf_rnn_layer_submodule.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index aa9b1ce0..39528a7a 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -114,8 +114,9 @@ module subroutine set_params(self, params) ! reshape the biases first = last + 1 + last = first + self % output_size self % biases = reshape( & - params(first:), & + params(first:last), & [self % output_size] & ) From b345865c11651c104d90528b9b4547fbc7fb2fde Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 19:00:39 -0600 Subject: [PATCH 17/31] Allowing backward 1D from dense to RNN --- src/nf/nf_layer_submodule.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 43ea84c1..6cb1862c 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -33,6 +33,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(flatten_layer) call this_layer % backward(prev_layer % output, gradient) + type is(rnn_layer) + call this_layer % backward(prev_layer % output, gradient) end select type is(flatten_layer) From 91b85e04e3ff39e6ca6ae53b9d19765b3af6398c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 19:02:10 -0600 Subject: [PATCH 18/31] Allowing backward 1D from RNN --- src/nf/nf_layer_submodule.f90 | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 6cb1862c..bc7d803f 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -49,6 +49,19 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select + type is(rnn_layer) + + select type(prev_layer => previous % p) + type is(input1d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(dense_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(flatten_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(rnn_layer) + call this_layer % backward(prev_layer % output, gradient) + end select + end select end subroutine backward_1d From 5e197f00bb2337650c21b26ad79118c327cab9cc Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 19:03:08 -0600 Subject: [PATCH 19/31] Allowing forward from dense to RNN --- src/nf/nf_layer_submodule.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index bc7d803f..4f7395fa 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -139,6 +139,8 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(flatten_layer) call this_layer % forward(prev_layer % output) + type is(rnn_layer) + call this_layer % forward(prev_layer % output) end select type is(conv2d_layer) From 7f671c80df2003ea9c41d2423438d248407b8aab Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 19:04:50 -0600 Subject: [PATCH 20/31] Allowing forward from RNN --- src/nf/nf_layer_submodule.f90 | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 4f7395fa..688f7e7d 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -197,6 +197,19 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select + type is(rnn_layer) + + ! Upstream layers permitted: input1d, dense, rnn + select type(prev_layer => input % p) + type is(input1d_layer) + call this_layer % forward(prev_layer % output) + type is(dense_layer) + call this_layer % forward(prev_layer % output) + type is(rnn_layer) + call this_layer % forward(prev_layer % output) + end select + + end select end subroutine forward From c27f59cb3fc718314e353ca52add371750b0a571 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 31 Oct 2023 19:05:39 -0600 Subject: [PATCH 21/31] Getting output from RNN --- src/nf/nf_layer_submodule.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 688f7e7d..9b8f4f8e 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -228,6 +228,8 @@ pure module subroutine get_output_1d(self, output) allocate(output, source=this_layer % output) type is(flatten_layer) allocate(output, source=this_layer % output) + type is(rnn_layer) + allocate(output, source=this_layer % output) class default error stop '1-d output can only be read from an input1d, dense, or flatten layer.' From 524d2c44c7a175811eb38e1f478cdd58d6ba676e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Wed, 1 Nov 2023 13:20:03 -0600 Subject: [PATCH 22/31] feat: Implementing reset state for RNN --- src/nf/nf_layer.f90 | 5 +++++ src/nf/nf_layer_submodule.f90 | 10 ++++++++++ src/nf/nf_network.f90 | 8 ++++++++ src/nf/nf_network_submodule.f90 | 14 ++++++++++++++ src/nf/nf_rnn_layer.f90 | 9 +++++++++ src/nf/nf_rnn_layer_submodule.f90 | 7 +++++++ 6 files changed, 53 insertions(+) diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index ca5e9606..9d3d5610 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -32,6 +32,7 @@ module nf_layer procedure :: set_params procedure :: init procedure :: print_info + procedure :: reset ! Specific subroutines for different array ranks procedure, private :: backward_1d @@ -153,6 +154,10 @@ module subroutine set_params(self, params) !! Parameters of this layer end subroutine set_params + module subroutine reset(self) + class(layer), intent(in out) :: self + end subroutine reset + end interface end module nf_layer diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 9b8f4f8e..ac13a2c0 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -442,4 +442,14 @@ module subroutine set_params(self, params) end subroutine set_params + module subroutine reset(self) + class(layer), intent(in out) :: self + + select type (this_layer => self % p) + type is (rnn_layer) + call this_layer % reset() + end select + + end subroutine reset + end submodule nf_layer_submodule diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 8afa8884..75a73604 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -26,6 +26,7 @@ module nf_network procedure :: get_params procedure :: print_info procedure :: set_params + procedure :: reset procedure :: train procedure :: update @@ -242,6 +243,13 @@ module subroutine update(self, optimizer, batch_size) !! Set to `size(input_data, dim=2)` for a batch gradient descent. end subroutine update + module subroutine reset(self) + !! Reset network state + !! + !! Currently only affect RNN layer type + class(network), intent(in out) :: self + end subroutine reset + end interface end module nf_network diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index ff3758c8..d949412c 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -738,4 +738,18 @@ module subroutine update(self, optimizer, batch_size) end subroutine update + module subroutine reset(self) + class(network), intent(in out) :: self + integer :: n, num_layers + + num_layers = size(self % layers) + do n = 2, num_layers + select type(this_layer => self % layers(n) % p) + type is(rnn_layer) + call self % layers(n) % reset() + end select + end do + + end subroutine reset + end submodule nf_network_submodule diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index 36597ca0..dbf82224 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -40,6 +40,7 @@ module nf_rnn_layer procedure :: get_params procedure :: init procedure :: set_params + procedure :: reset end type rnn_layer @@ -126,6 +127,14 @@ module subroutine init(self, input_shape) !! Shape of the input layer end subroutine init + module subroutine reset(self) + !! Reset layer state + !! + !! Currently reset state to zero but might be worth reconsidering it + !! in the future. + class(rnn_layer), intent(in out) :: self + end subroutine reset + end interface end module nf_rnn_layer diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 index 39528a7a..fe2992aa 100644 --- a/src/nf/nf_rnn_layer_submodule.f90 +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -166,4 +166,11 @@ module subroutine init(self, input_shape) end subroutine init + module subroutine reset(self) + class(rnn_layer), intent(in out) :: self + + self % state = 0 + + end subroutine reset + end submodule nf_rnn_layer_submodule From 598f9e7b406ae8491803a98003b5ec3640927b81 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 5 Nov 2023 19:30:04 -0700 Subject: [PATCH 23/31] refactor: set_state() on layer level Instead of reset on network level. --- src/nf/nf_layer.f90 | 9 +++++---- src/nf/nf_layer_submodule.f90 | 16 ++++++++++------ src/nf/nf_network.f90 | 8 -------- src/nf/nf_network_submodule.f90 | 17 +++-------------- src/nf/nf_rnn_layer.f90 | 16 +++++++++++++--- 5 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index 9d3d5610..5bcac6b7 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -30,9 +30,9 @@ module nf_layer procedure :: get_params procedure :: get_gradients procedure :: set_params + procedure :: set_state procedure :: init procedure :: print_info - procedure :: reset ! Specific subroutines for different array ranks procedure, private :: backward_1d @@ -154,9 +154,10 @@ module subroutine set_params(self, params) !! Parameters of this layer end subroutine set_params - module subroutine reset(self) - class(layer), intent(in out) :: self - end subroutine reset + module subroutine set_state(self, state) + class(layer), intent(inout) :: self + real, intent(in), optional :: state(:) + end subroutine set_state end interface diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ac13a2c0..09907a95 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -442,14 +442,18 @@ module subroutine set_params(self, params) end subroutine set_params - module subroutine reset(self) - class(layer), intent(in out) :: self + module subroutine set_state(self, state) + class(layer), intent(inout) :: self + real, intent(in), optional :: state(:) select type (this_layer => self % p) type is (rnn_layer) - call this_layer % reset() - end select - - end subroutine reset + if (present(state)) then + this_layer % state = state + else + this_layer % state = 0 + end if + end select + end subroutine set_state end submodule nf_layer_submodule diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 75a73604..8afa8884 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -26,7 +26,6 @@ module nf_network procedure :: get_params procedure :: print_info procedure :: set_params - procedure :: reset procedure :: train procedure :: update @@ -243,13 +242,6 @@ module subroutine update(self, optimizer, batch_size) !! Set to `size(input_data, dim=2)` for a batch gradient descent. end subroutine update - module subroutine reset(self) - !! Reset network state - !! - !! Currently only affect RNN layer type - class(network), intent(in out) :: self - end subroutine reset - end interface end module nf_network diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index d949412c..c0222c60 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -733,23 +733,12 @@ module subroutine update(self, optimizer, batch_size) type is(conv2d_layer) this_layer % dw = 0 this_layer % db = 0 - end select - end do - - end subroutine update - - module subroutine reset(self) - class(network), intent(in out) :: self - integer :: n, num_layers - - num_layers = size(self % layers) - do n = 2, num_layers - select type(this_layer => self % layers(n) % p) type is(rnn_layer) - call self % layers(n) % reset() + this_layer % dw = 0 + this_layer % db = 0 end select end do - end subroutine reset + end subroutine update end submodule nf_network_submodule diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index dbf82224..efdfa899 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -14,7 +14,7 @@ module nf_rnn_layer type, extends(base_layer) :: rnn_layer - !! Concrete implementation of a dense (fully-connected) layer type + !! Concrete implementation of an RNN (fully-connected) layer type integer :: input_size integer :: output_size @@ -40,7 +40,7 @@ module nf_rnn_layer procedure :: get_params procedure :: init procedure :: set_params - procedure :: reset + procedure :: set_state end type rnn_layer @@ -94,7 +94,7 @@ pure module function get_params(self) result(params) !! Return the parameters (weights and biases) of this layer. !! The parameters are ordered as weights first, biases second. class(rnn_layer), intent(in) :: self - !! Dense layer instance + !! RNN layer instance real, allocatable :: params(:) !! Parameters of this layer end function get_params @@ -137,4 +137,14 @@ end subroutine reset end interface + subroutine set_state(self, state) + type(rnn_layer), intent(inout) :: self + real, intent(in), optional :: state(:) + if (present(state)) then + self % state = state + else + self % state = 0 + end if + end subroutine set_state + end module nf_rnn_layer From b7bead66d20b32230d12223a729841c6da78540a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 14 Nov 2023 09:21:42 -0700 Subject: [PATCH 24/31] wip: A simple RNN example --- example/simple_rnn.f90 | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 example/simple_rnn.f90 diff --git a/example/simple_rnn.f90 b/example/simple_rnn.f90 new file mode 100644 index 00000000..dcb1db7f --- /dev/null +++ b/example/simple_rnn.f90 @@ -0,0 +1,48 @@ +program simple_rnn + use nf, only: dense, input, network, rnn, sgd + implicit none + type(network) :: net + real, allocatable :: x(:), y(:), p(:) + integer, parameter :: num_iterations = 1000 + integer :: n, l + + allocate(p(2)) + + print '("Simple RNN")' + print '(60("="))' + + net = network([ & + input(3), & + rnn(5), & + rnn(1) & + ]) + + call net % print_info() + + x = [0.2, 0.4, 0.6] + y = [0.123456, 0.246802] + + do n = 0, num_iterations + + do l = 1, size(net % layers) + if (net % layers(l) % name == 'rnn') call net % layers(l) % set_state() + end do + + if (mod(n, 100) == 0) then + p(1:1) = net % predict(x) + p(2:2) = net % predict(x) + print '(i4,2(3x,f8.6))', n, p + + else + + call net % forward(x) + call net % backward(y(1:1)) + call net % update(optimizer=sgd(learning_rate=.001)) + call net % forward(x) + call net % backward(y(2:2)) + call net % update(optimizer=sgd(learning_rate=.001)) + end if + + end do + +end program simple_rnn From 088e4f3ab23127ed73d51087028e18740b508e05 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 14 Nov 2023 09:22:42 -0700 Subject: [PATCH 25/31] feat: layer getting gradient from RNN --- src/nf/nf_layer_submodule.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 09907a95..e982d670 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -379,6 +379,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. + type is (rnn_layer) + gradients = this_layer % get_gradients() class default error stop 'get_gradients() with unknown layer type.' end select From 4d0a4fd3574a01e59fb0f12b4fc4eb66560f077a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 14 Nov 2023 09:23:22 -0700 Subject: [PATCH 26/31] feat: layer setting params for RNN --- src/nf/nf_layer_submodule.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index e982d670..2b06a338 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -438,6 +438,9 @@ module subroutine set_params(self, params) write(stderr, '(a)') 'Warning: calling set_params() ' & // 'on a zero-parameter layer; nothing to do.' + type is (rnn_layer) + call this_layer % set_params(params) + class default error stop 'set_params() with unknown layer type.' end select From ee516a88e9225b6dc0701c782a2d530e09e1aaf8 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 14 Nov 2023 10:12:28 -0700 Subject: [PATCH 27/31] Might not use set_state at rnn_layer level --- src/nf/nf_rnn_layer.f90 | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 index efdfa899..a4f55971 100644 --- a/src/nf/nf_rnn_layer.f90 +++ b/src/nf/nf_rnn_layer.f90 @@ -40,7 +40,7 @@ module nf_rnn_layer procedure :: get_params procedure :: init procedure :: set_params - procedure :: set_state + ! procedure :: set_state end type rnn_layer @@ -127,24 +127,11 @@ module subroutine init(self, input_shape) !! Shape of the input layer end subroutine init - module subroutine reset(self) - !! Reset layer state - !! - !! Currently reset state to zero but might be worth reconsidering it - !! in the future. - class(rnn_layer), intent(in out) :: self - end subroutine reset + !module subroutine set_state(self, state) + ! type(rnn_layer), intent(inout) :: self + ! real, intent(in), optional :: state(:) + !end subroutine set_state end interface - subroutine set_state(self, state) - type(rnn_layer), intent(inout) :: self - real, intent(in), optional :: state(:) - if (present(state)) then - self % state = state - else - self % state = 0 - end if - end subroutine set_state - end module nf_rnn_layer From 07f75872b67320a9a350598a3dd28c9c0cb537cc Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 21:38:58 -0600 Subject: [PATCH 28/31] fix: New access point to 'loss % derivative' Previously `quadratic_derivative`. --- src/nf/nf_network_submodule.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index c0222c60..3414d89f 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -336,7 +336,7 @@ pure module subroutine backward(self, output, loss) type is(rnn_layer) call self % layers(n) % backward( & self % layers(n - 1), & - quadratic_derivative(output, this_layer % output) & + self % loss % derivative(output, this_layer % output) & ) end select else From 9b22826342cdf8a50310eaeb7c863dcd73c64a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Castel=C3=A3o?= Date: Sun, 30 Jun 2024 15:40:26 -0600 Subject: [PATCH 29/31] Define set_state as pure Co-authored-by: Jeremie Vandenplas --- src/nf/nf_layer_submodule.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 2b06a338..7fac8c2b 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -447,7 +447,7 @@ module subroutine set_params(self, params) end subroutine set_params - module subroutine set_state(self, state) + pure module subroutine set_state(self, state) class(layer), intent(inout) :: self real, intent(in), optional :: state(:) From 5bc9bc5bc5ce4c8491b639b3435f8f28f86df83d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 30 Jun 2024 17:06:21 -0600 Subject: [PATCH 30/31] fix: pure interface for set_state --- src/nf/nf_layer.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index 5bcac6b7..487e112e 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -154,7 +154,7 @@ module subroutine set_params(self, params) !! Parameters of this layer end subroutine set_params - module subroutine set_state(self, state) + pure module subroutine set_state(self, state) class(layer), intent(inout) :: self real, intent(in), optional :: state(:) end subroutine set_state From 4c7c0b9991ec306d2055f6dcc9d1cf7e6803548d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 21 Oct 2024 00:01:06 -0600 Subject: [PATCH 31/31] fix: Conciliating with latest main state --- src/nf/nf_network_submodule.f90 | 153 +------------------------------- 1 file changed, 1 insertion(+), 152 deletions(-) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 3414d89f..1bc03773 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -8,8 +8,6 @@ use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer use nf_rnn_layer, only: rnn_layer - use nf_io_hdf5, only: get_hdf5_dataset - use nf_keras, only: get_keras_h5_layers, keras_layer use nf_layer, only: layer use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape, rnn use nf_loss, only: quadratic @@ -96,155 +94,6 @@ module function network_from_layers(layers) result(res) end function network_from_layers - module function network_from_keras(filename) result(res) - character(*), intent(in) :: filename - type(network) :: res - type(keras_layer), allocatable :: keras_layers(:) - type(layer), allocatable :: layers(:) - character(:), allocatable :: layer_name - character(:), allocatable :: object_name - integer :: n - - keras_layers = get_keras_h5_layers(filename) - - allocate(layers(size(keras_layers))) - - do n = 1, size(layers) - - select case(keras_layers(n) % class) - - case('Conv2D') - - if (keras_layers(n) % kernel_size(1) & - /= keras_layers(n) % kernel_size(2)) & - error stop 'Non-square kernel in conv2d layer not supported.' - - layers(n) = conv2d( & - keras_layers(n) % filters, & - !FIXME add support for non-square kernel - keras_layers(n) % kernel_size(1), & - get_activation_by_name(keras_layers(n) % activation) & - ) - - case('Dense') - - layers(n) = dense( & - keras_layers(n) % units(1), & - get_activation_by_name(keras_layers(n) % activation) & - ) - - case('Flatten') - layers(n) = flatten() - - case('InputLayer') - if (size(keras_layers(n) % units) == 1) then - ! input1d - layers(n) = input(keras_layers(n) % units(1)) - else - ! input3d - layers(n) = input(keras_layers(n) % units) - end if - - case('MaxPooling2D') - - if (keras_layers(n) % pool_size(1) & - /= keras_layers(n) % pool_size(2)) & - error stop 'Non-square pool in maxpool2d layer not supported.' - - if (keras_layers(n) % strides(1) & - /= keras_layers(n) % strides(2)) & - error stop 'Unequal strides in maxpool2d layer are not supported.' - - layers(n) = maxpool2d( & - !FIXME add support for non-square pool and stride - keras_layers(n) % pool_size(1), & - keras_layers(n) % strides(1) & - ) - - case('Reshape') - layers(n) = reshape(keras_layers(n) % target_shape) - - case default - error stop 'This Keras layer is not supported' - - end select - - end do - - res = network(layers) - - ! Loop over layers and read weights and biases from the Keras h5 file - ! for each; currently only dense layers are implemented. - do n = 2, size(res % layers) - - layer_name = keras_layers(n) % name - - select type(this_layer => res % layers(n) % p) - - type is(conv2d_layer) - ! Read biases from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/bias:0' - call get_hdf5_dataset(filename, object_name, this_layer % biases) - - ! Read weights from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/kernel:0' - call get_hdf5_dataset(filename, object_name, this_layer % kernel) - - type is(dense_layer) - - ! Read biases from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/bias:0' - call get_hdf5_dataset(filename, object_name, this_layer % biases) - - ! Read weights from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/kernel:0' - call get_hdf5_dataset(filename, object_name, this_layer % weights) - - type is(flatten_layer) - ! Nothing to do - continue - - type is(maxpool2d_layer) - ! Nothing to do - continue - - type is(reshape3d_layer) - ! Nothing to do - continue - - type is(rnn_layer) - - ! Read biases from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/simple_rnn_cell_23/bias:0' - call get_hdf5_dataset(filename, object_name, this_layer % biases) - - ! Read weights from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/simple_rnn_cell_23/kernel:0' - call get_hdf5_dataset(filename, object_name, this_layer % weights) - - ! Read recurrent weights from file - object_name = '/model_weights/' // layer_name // '/' & - // layer_name // '/simple_rnn_cell_23/recurrent_kernel:0' - call get_hdf5_dataset(filename, object_name, this_layer % recurrent) - - class default - error stop 'Internal error in network_from_keras(); ' & - // 'mismatch in layer types between the Keras and ' & - // 'neural-fortran model layers.' - - end select - - end do - - end function network_from_keras - - pure function get_activation_by_name(activation_name) result(res) ! Workaround to get activation_function with some ! hardcoded default parameters by its name. @@ -298,7 +147,7 @@ pure function get_activation_by_name(activation_name) result(res) end function get_activation_by_name - pure module subroutine backward(self, output, loss) + module subroutine backward(self, output, loss) class(network), intent(in out) :: self real, intent(in) :: output(:) class(loss_type), intent(in), optional :: loss