Skip to content

Commit 78d26e4

Browse files
authored
Merge branch 'main' into locally_connected_layer
2 parents b69ba9a + e68e6c2 commit 78d26e4

12 files changed

+536
-21
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ add_library(neural-fortran
3939
src/nf/nf_input3d_layer_submodule.f90
4040
src/nf/nf_layer_constructors.f90
4141
src/nf/nf_layer_constructors_submodule.f90
42+
src/nf/nf_layernorm.f90
43+
src/nf/nf_layernorm_submodule.f90
4244
src/nf/nf_layer.f90
4345
src/nf/nf_layer_submodule.f90
4446
src/nf/nf_locally_connected_1d_submodule.f90

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
3434
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
3535
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
3636
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
37-
| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 |||
38-
| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 |||
37+
| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
38+
| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
39+
| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 |||
3940
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
4041
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||
4142

fpm.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "neural-fortran"
2-
version = "0.19.0"
2+
version = "0.20.0"
33
license = "MIT"
44
author = "Milan Curcic"
55
maintainer = "[email protected]"

src/nf.f90

+14-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,20 @@ module nf
33
use nf_datasets_mnist, only: label_digits, load_mnist
44
use nf_layer, only: layer
55
use nf_layer_constructors, only: &
6-
conv1d, conv2d, dense, dropout, flatten, input, linear2d, locally_connected_1d, &
7-
maxpool1d, maxpool2d, reshape, reshape2d, self_attention
6+
conv1d, &
7+
conv2d, &
8+
dense, &
9+
dropout, &
10+
flatten, &
11+
input, &
12+
locally_connected_1d, &
13+
linear2d, &
14+
maxpool1d, &
15+
maxpool2d, &
16+
reshape, &
17+
reshape2d, &
18+
self_attention, &
19+
layernorm
820
use nf_loss, only: mse, quadratic
921
use nf_metrics, only: corr, maxabs
1022
use nf_network, only: network

src/nf/nf_layer_constructors.f90

+22-11
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@ module nf_layer_constructors
1616
flatten, &
1717
input, locally_connected_1d, maxpool1d, &
1818
linear2d, &
19+
maxpool1d, &
1920
maxpool2d, &
20-
reshape, reshape2d, &
21-
self_attention
21+
reshape, &
22+
reshape2d, &
23+
self_attention, &
24+
layernorm
2225

2326
interface input
2427

@@ -310,15 +313,23 @@ module function linear2d(out_features) result(res)
310313
!! Resulting layer instance
311314
end function linear2d
312315

313-
module function self_attention(num_heads) result(res)
314-
!! Rank-2 (sequence_length, out_features) self attention constructor.
315-
!! sequence_length and model_dimension are determined at layer initialization, based on the
316-
!! output shape of the previous layer.
317-
integer, intent(in) :: num_heads
318-
!! Number of attention heads
319-
type(layer) :: res
320-
!! Resulting layer instance
321-
end function self_attention
316+
module function self_attention(num_heads) result(res)
317+
!! Rank-2 (sequence_length, out_features) self attention constructor.
318+
!! sequence_length and model_dimension are determined at layer initialization, based on the
319+
!! output shape of the previous layer.
320+
integer, intent(in) :: num_heads
321+
!! Number of attention heads
322+
type(layer) :: res
323+
!! Resulting layer instance
324+
end function self_attention
325+
326+
module function layernorm() result(res)
327+
!! Layer Normalization
328+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
329+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
330+
!! https://arxiv.org/abs/1607.06450v1
331+
type(layer) :: res
332+
end function layernorm
322333

323334
end interface
324335

src/nf/nf_layer_constructors_submodule.f90

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use nf_reshape2d_layer, only: reshape2d_layer
1717
use nf_linear2d_layer, only: linear2d_layer
1818
use nf_self_attention_layer, only: self_attention_layer
19+
use nf_layernorm_layer, only: layernorm_layer
1920
use nf_activation, only: activation_function, relu, sigmoid
2021

2122
implicit none
@@ -275,4 +276,11 @@ module function self_attention(num_heads) result(res)
275276
allocate(res % p, source=self_attention_layer(num_heads))
276277
end function self_attention
277278

279+
module function layernorm() result(res)
280+
type(layer) :: res
281+
282+
res % name = 'layernorm'
283+
allocate(res % p, source=layernorm_layer())
284+
end function layernorm
285+
278286
end submodule nf_layer_constructors_submodule

src/nf/nf_layer_submodule.f90

+48-5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use nf_reshape_layer, only: reshape3d_layer
1717
use nf_linear2d_layer, only: linear2d_layer
1818
use nf_self_attention_layer, only: self_attention_layer
19+
use nf_layernorm_layer, only: layernorm_layer
1920
use nf_optimizers, only: optimizer_base_type
2021

2122
contains
@@ -49,7 +50,6 @@ pure module subroutine backward_1d(self, previous, gradient)
4950
call this_layer % backward(gradient)
5051

5152
type is(flatten_layer)
52-
5353
! Upstream layers permitted: input2d, input3d, conv1d, conv2d, locally_connected_1d, maxpool1d, maxpool2d
5454
select type(prev_layer => previous % p)
5555
type is(input2d_layer)
@@ -70,6 +70,8 @@ pure module subroutine backward_1d(self, previous, gradient)
7070
call this_layer % backward(prev_layer % output, gradient)
7171
type is(self_attention_layer)
7272
call this_layer % backward(prev_layer % output, gradient)
73+
type is(layernorm_layer)
74+
call this_layer % backward(prev_layer % output, gradient)
7375
end select
7476

7577
end select
@@ -94,6 +96,8 @@ pure module subroutine backward_2d(self, previous, gradient)
9496
call this_layer % backward(prev_layer % output, gradient)
9597
type is(self_attention_layer)
9698
call this_layer % backward(prev_layer % output, gradient)
99+
type is(layernorm_layer)
100+
call this_layer % backward(prev_layer % output, gradient)
97101
end select
98102

99103
type is(self_attention_layer)
@@ -105,8 +109,18 @@ pure module subroutine backward_2d(self, previous, gradient)
105109
call this_layer % backward(prev_layer % output, gradient)
106110
type is(self_attention_layer)
107111
call this_layer % backward(prev_layer % output, gradient)
112+
type is(layernorm_layer)
113+
call this_layer % backward(prev_layer % output, gradient)
108114
end select
109115

116+
type is(layernorm_layer)
117+
118+
select type(prev_layer => previous % p)
119+
type is(linear2d_layer)
120+
call this_layer % backward(prev_layer % output, gradient)
121+
type is(self_attention_layer)
122+
call this_layer % backward(prev_layer % output, gradient)
123+
end select
110124
end select
111125

112126
! Backward pass from a 2-d layer downstream currently implemented
@@ -358,6 +372,8 @@ module subroutine forward(self, input)
358372
call this_layer % forward(prev_layer % output)
359373
type is(linear2d_layer)
360374
call this_layer % forward(prev_layer % output)
375+
type is(layernorm_layer)
376+
call this_layer % forward(prev_layer % output)
361377
end select
362378

363379
type is(reshape3d_layer)
@@ -380,26 +396,40 @@ module subroutine forward(self, input)
380396

381397
type is(linear2d_layer)
382398

383-
! Upstream layers permitted: input2d, linear2d
399+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
384400
select type(prev_layer => input % p)
385401
type is(input2d_layer)
386402
call this_layer % forward(prev_layer % output)
387403
type is(linear2d_layer)
388404
call this_layer % forward(prev_layer % output)
389405
type is(self_attention_layer)
390406
call this_layer % forward(prev_layer % output)
407+
type is(layernorm_layer)
408+
call this_layer % forward(prev_layer % output)
391409
end select
392410

393411
type is(self_attention_layer)
394412

395-
! Upstream layers permitted: input2d, linear2d
413+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
396414
select type(prev_layer => input % p)
397415
type is(input2d_layer)
398416
call this_layer % forward(prev_layer % output)
399417
type is(linear2d_layer)
400418
call this_layer % forward(prev_layer % output)
401419
type is(self_attention_layer)
402420
call this_layer % forward(prev_layer % output)
421+
type is(layernorm_layer)
422+
call this_layer % forward(prev_layer % output)
423+
end select
424+
425+
type is(layernorm_layer)
426+
427+
! Upstream layers permitted: linear2d, self_attention
428+
select type(prev_layer => input % p)
429+
type is(linear2d_layer)
430+
call this_layer % forward(prev_layer % output)
431+
type is(self_attention_layer)
432+
call this_layer % forward(prev_layer % output)
403433
end select
404434

405435
end select
@@ -449,6 +479,8 @@ pure module subroutine get_output_2d(self, output)
449479
allocate(output, source=this_layer % output)
450480
type is(self_attention_layer)
451481
allocate(output, source=this_layer % output)
482+
type is(layernorm_layer)
483+
allocate(output, source=this_layer % output)
452484
class default
453485
error stop '2-d output can only be read from an input2d or linear2d layer.'
454486

@@ -492,8 +524,8 @@ impure elemental module subroutine init(self, input)
492524
call this_layer % init(input % layer_shape)
493525
end select
494526

495-
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
496-
! self_attention layers is not known until we receive an input layer.
527+
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
528+
! self_attention or layernorm layers is not known until we receive an input layer.
497529
select type(this_layer => self % p)
498530
type is(conv1d_layer)
499531
self % layer_shape = shape(this_layer % output)
@@ -511,6 +543,8 @@ impure elemental module subroutine init(self, input)
511543
self % layer_shape = shape(this_layer % output)
512544
type is(self_attention_layer)
513545
self % layer_shape = shape(this_layer % output)
546+
type is(layernorm_layer)
547+
self % layer_shape = shape(this_layer % output)
514548
type is(maxpool2d_layer)
515549
self % layer_shape = shape(this_layer % output)
516550
end select
@@ -577,6 +611,8 @@ elemental module function get_num_params(self) result(num_params)
577611
num_params = this_layer % get_num_params()
578612
type is (self_attention_layer)
579613
num_params = this_layer % get_num_params()
614+
type is (layernorm_layer)
615+
num_params = this_layer % get_num_params()
580616
class default
581617
error stop 'Unknown layer type.'
582618
end select
@@ -618,6 +654,8 @@ module function get_params(self) result(params)
618654
params = this_layer % get_params()
619655
type is (self_attention_layer)
620656
params = this_layer % get_params()
657+
type is (layernorm_layer)
658+
params = this_layer % get_params()
621659
class default
622660
error stop 'Unknown layer type.'
623661
end select
@@ -659,6 +697,8 @@ module function get_gradients(self) result(gradients)
659697
gradients = this_layer % get_gradients()
660698
type is (self_attention_layer)
661699
gradients = this_layer % get_gradients()
700+
type is (layernorm_layer)
701+
gradients = this_layer % get_gradients()
662702
class default
663703
error stop 'Unknown layer type.'
664704
end select
@@ -728,6 +768,9 @@ module subroutine set_params(self, params)
728768
type is (self_attention_layer)
729769
call this_layer % set_params(params)
730770

771+
type is (layernorm_layer)
772+
call this_layer % set_params(params)
773+
731774
type is (maxpool2d_layer)
732775
! No parameters to set.
733776
write(stderr, '(a)') 'Warning: calling set_params() ' &

src/nf/nf_layernorm.f90

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
module nf_layernorm_layer
2+
use nf_activation, only: activation_function
3+
use nf_base_layer, only: base_layer
4+
5+
implicit none
6+
7+
private
8+
public :: layernorm_layer
9+
10+
type, extends(base_layer) :: layernorm_layer
11+
!! Layer Normalization
12+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
13+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
14+
!! https://arxiv.org/abs/1607.06450v1
15+
integer :: sequence_length
16+
integer :: model_dimension
17+
18+
real :: eps
19+
real, allocatable :: gamma(:)
20+
real, allocatable :: beta(:)
21+
22+
real, allocatable :: d_gamma(:)
23+
real, allocatable :: d_beta(:)
24+
real, allocatable :: gradient(:, :)
25+
26+
real, allocatable :: mu(:, :)
27+
real, allocatable :: sigma(:)
28+
29+
real, allocatable :: output(:, :)
30+
31+
! temp storages
32+
real, allocatable, private :: normalized(:, :)
33+
real, allocatable, private :: one_over_sigma(:, :)
34+
real, allocatable, private :: gradient_by_gamma_over_sigma(:, :)
35+
contains
36+
procedure :: forward
37+
procedure :: backward
38+
procedure :: init
39+
procedure :: get_num_params
40+
procedure :: get_params
41+
procedure :: get_gradients
42+
procedure :: set_params
43+
end type layernorm_layer
44+
45+
interface layernorm_layer
46+
module function layernorm_layer_cons() &
47+
result(res)
48+
type(layernorm_layer) :: res
49+
end function layernorm_layer_cons
50+
end interface layernorm_layer
51+
52+
interface
53+
pure module subroutine forward(self, input)
54+
class(layernorm_layer), intent(in out) :: self
55+
real, intent(in) :: input(:, :)
56+
end subroutine forward
57+
58+
pure module subroutine backward(self, input, gradient)
59+
class(layernorm_layer), intent(in out) :: self
60+
real, intent(in) :: input(:, :)
61+
real, intent(in) :: gradient(:, :)
62+
end subroutine backward
63+
64+
module subroutine init(self, input_shape)
65+
class(layernorm_layer), intent(in out) :: self
66+
integer, intent(in) :: input_shape(:)
67+
end subroutine init
68+
69+
pure module function get_num_params(self) result(num_params)
70+
class(layernorm_layer), intent(in) :: self
71+
integer :: num_params
72+
end function get_num_params
73+
74+
75+
module function get_params(self) result(params)
76+
class(layernorm_layer), intent(in), target :: self
77+
real, allocatable :: params(:)
78+
end function get_params
79+
80+
81+
module function get_gradients(self) result(gradients)
82+
class(layernorm_layer), intent(in), target :: self
83+
real, allocatable :: gradients(:)
84+
end function get_gradients
85+
86+
87+
module subroutine set_params(self, params)
88+
class(layernorm_layer), intent(in out) :: self
89+
real, intent(in), target :: params(:)
90+
end subroutine set_params
91+
end interface
92+
end module nf_layernorm_layer

0 commit comments

Comments
 (0)