Skip to content

Commit e68e6c2

Browse files
Layernorm (#203)
* layernorm: initial implementation * layernorm: rename source file * layernorm: remove redundant arguments * layernorm: remove stack allocated arrays * layernorm: rearrange into submodule * layernorm: add error to stop in test * layernorm: add gradient updates * layernorm: public api * layernorm: update tests * layernorm: update cmake * layernorm: use mold for temp allocation * layernorm: rename to layernorm * layernorm: allow usage of layernorm at the end * layernorm: integration test for layernorm * layernorm: memory allocation optimization * Tidy up * Bump version * Add layernorm to the table of layers --------- Co-authored-by: milancurcic <[email protected]>
1 parent ed8b340 commit e68e6c2

12 files changed

+522
-19
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ add_library(neural-fortran
3737
src/nf/nf_input3d_layer_submodule.f90
3838
src/nf/nf_layer_constructors.f90
3939
src/nf/nf_layer_constructors_submodule.f90
40+
src/nf/nf_layernorm.f90
41+
src/nf/nf_layernorm_submodule.f90
4042
src/nf/nf_layer.f90
4143
src/nf/nf_layer_submodule.f90
4244
src/nf/nf_linear2d_layer.f90

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
3434
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
3535
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
3636
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
37-
| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 |||
38-
| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 |||
37+
| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
38+
| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
39+
| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 |||
3940
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
4041
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||
4142

fpm.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "neural-fortran"
2-
version = "0.19.0"
2+
version = "0.20.0"
33
license = "MIT"
44
author = "Milan Curcic"
55
maintainer = "[email protected]"

src/nf.f90

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ module nf
1111
linear2d, &
1212
maxpool2d, &
1313
reshape, &
14-
self_attention
14+
self_attention, &
15+
layernorm
1516
use nf_loss, only: mse, quadratic
1617
use nf_metrics, only: corr, maxabs
1718
use nf_network, only: network

src/nf/nf_layer_constructors.f90

+19-10
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ module nf_layer_constructors
1717
linear2d, &
1818
maxpool2d, &
1919
reshape, &
20-
self_attention
20+
self_attention, &
21+
layernorm
2122

2223
interface input
2324

@@ -222,15 +223,23 @@ module function linear2d(out_features) result(res)
222223
!! Resulting layer instance
223224
end function linear2d
224225

225-
module function self_attention(num_heads) result(res)
226-
!! Rank-2 (sequence_length, out_features) self attention constructor.
227-
!! sequence_length and model_dimension are determined at layer initialization, based on the
228-
!! output shape of the previous layer.
229-
integer, intent(in) :: num_heads
230-
!! Number of attention heads
231-
type(layer) :: res
232-
!! Resulting layer instance
233-
end function self_attention
226+
module function self_attention(num_heads) result(res)
227+
!! Rank-2 (sequence_length, out_features) self attention constructor.
228+
!! sequence_length and model_dimension are determined at layer initialization, based on the
229+
!! output shape of the previous layer.
230+
integer, intent(in) :: num_heads
231+
!! Number of attention heads
232+
type(layer) :: res
233+
!! Resulting layer instance
234+
end function self_attention
235+
236+
module function layernorm() result(res)
237+
!! Layer Normalization
238+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
239+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
240+
!! https://arxiv.org/abs/1607.06450v1
241+
type(layer) :: res
242+
end function layernorm
234243

235244
end interface
236245

src/nf/nf_layer_constructors_submodule.f90

+8
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use nf_reshape_layer, only: reshape3d_layer
1313
use nf_linear2d_layer, only: linear2d_layer
1414
use nf_self_attention_layer, only: self_attention_layer
15+
use nf_layernorm_layer, only: layernorm_layer
1516
use nf_activation, only: activation_function, relu, sigmoid
1617

1718
implicit none
@@ -179,4 +180,11 @@ module function self_attention(num_heads) result(res)
179180
allocate(res % p, source=self_attention_layer(num_heads))
180181
end function self_attention
181182

183+
module function layernorm() result(res)
184+
type(layer) :: res
185+
186+
res % name = 'layernorm'
187+
allocate(res % p, source=layernorm_layer())
188+
end function layernorm
189+
182190
end submodule nf_layer_constructors_submodule

src/nf/nf_layer_submodule.f90

+49-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use nf_reshape_layer, only: reshape3d_layer
1313
use nf_linear2d_layer, only: linear2d_layer
1414
use nf_self_attention_layer, only: self_attention_layer
15+
use nf_layernorm_layer, only: layernorm_layer
1516
use nf_optimizers, only: optimizer_base_type
1617

1718
contains
@@ -46,7 +47,7 @@ pure module subroutine backward_1d(self, previous, gradient)
4647

4748
type is(flatten_layer)
4849

49-
! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
50+
! Upstream layers permitted: input2d, input3d, conv2d, layernorm, maxpool2d
5051
select type(prev_layer => previous % p)
5152
type is(input2d_layer)
5253
call this_layer % backward(prev_layer % output, gradient)
@@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient)
6061
call this_layer % backward(prev_layer % output, gradient)
6162
type is(self_attention_layer)
6263
call this_layer % backward(prev_layer % output, gradient)
64+
type is(layernorm_layer)
65+
call this_layer % backward(prev_layer % output, gradient)
6366
end select
6467

6568
end select
@@ -84,6 +87,8 @@ pure module subroutine backward_2d(self, previous, gradient)
8487
call this_layer % backward(prev_layer % output, gradient)
8588
type is(self_attention_layer)
8689
call this_layer % backward(prev_layer % output, gradient)
90+
type is(layernorm_layer)
91+
call this_layer % backward(prev_layer % output, gradient)
8792
end select
8893

8994
type is(self_attention_layer)
@@ -95,8 +100,18 @@ pure module subroutine backward_2d(self, previous, gradient)
95100
call this_layer % backward(prev_layer % output, gradient)
96101
type is(self_attention_layer)
97102
call this_layer % backward(prev_layer % output, gradient)
103+
type is(layernorm_layer)
104+
call this_layer % backward(prev_layer % output, gradient)
98105
end select
99106

107+
type is(layernorm_layer)
108+
109+
select type(prev_layer => previous % p)
110+
type is(linear2d_layer)
111+
call this_layer % backward(prev_layer % output, gradient)
112+
type is(self_attention_layer)
113+
call this_layer % backward(prev_layer % output, gradient)
114+
end select
100115
end select
101116

102117
end subroutine backward_2d
@@ -234,6 +249,8 @@ module subroutine forward(self, input)
234249
call this_layer % forward(prev_layer % output)
235250
type is(linear2d_layer)
236251
call this_layer % forward(prev_layer % output)
252+
type is(layernorm_layer)
253+
call this_layer % forward(prev_layer % output)
237254
end select
238255

239256
type is(reshape3d_layer)
@@ -250,26 +267,40 @@ module subroutine forward(self, input)
250267

251268
type is(linear2d_layer)
252269

253-
! Upstream layers permitted: input2d, linear2d
270+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
254271
select type(prev_layer => input % p)
255272
type is(input2d_layer)
256273
call this_layer % forward(prev_layer % output)
257274
type is(linear2d_layer)
258275
call this_layer % forward(prev_layer % output)
259276
type is(self_attention_layer)
260277
call this_layer % forward(prev_layer % output)
278+
type is(layernorm_layer)
279+
call this_layer % forward(prev_layer % output)
261280
end select
262281

263282
type is(self_attention_layer)
264283

265-
! Upstream layers permitted: input2d, linear2d
284+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
266285
select type(prev_layer => input % p)
267286
type is(input2d_layer)
268287
call this_layer % forward(prev_layer % output)
269288
type is(linear2d_layer)
270289
call this_layer % forward(prev_layer % output)
271290
type is(self_attention_layer)
272291
call this_layer % forward(prev_layer % output)
292+
type is(layernorm_layer)
293+
call this_layer % forward(prev_layer % output)
294+
end select
295+
296+
type is(layernorm_layer)
297+
298+
! Upstream layers permitted: linear2d, self_attention
299+
select type(prev_layer => input % p)
300+
type is(linear2d_layer)
301+
call this_layer % forward(prev_layer % output)
302+
type is(self_attention_layer)
303+
call this_layer % forward(prev_layer % output)
273304
end select
274305

275306
end select
@@ -311,6 +342,8 @@ pure module subroutine get_output_2d(self, output)
311342
allocate(output, source=this_layer % output)
312343
type is(self_attention_layer)
313344
allocate(output, source=this_layer % output)
345+
type is(layernorm_layer)
346+
allocate(output, source=this_layer % output)
314347
class default
315348
error stop '2-d output can only be read from an input2d or linear2d layer.'
316349

@@ -354,8 +387,8 @@ impure elemental module subroutine init(self, input)
354387
call this_layer % init(input % layer_shape)
355388
end select
356389

357-
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
358-
! self_attention layers is not known until we receive an input layer.
390+
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
391+
! self_attention or layernorm layers is not known until we receive an input layer.
359392
select type(this_layer => self % p)
360393
type is(conv2d_layer)
361394
self % layer_shape = shape(this_layer % output)
@@ -367,6 +400,8 @@ impure elemental module subroutine init(self, input)
367400
self % layer_shape = shape(this_layer % output)
368401
type is(self_attention_layer)
369402
self % layer_shape = shape(this_layer % output)
403+
type is(layernorm_layer)
404+
self % layer_shape = shape(this_layer % output)
370405
type is(maxpool2d_layer)
371406
self % layer_shape = shape(this_layer % output)
372407
end select
@@ -425,6 +460,8 @@ elemental module function get_num_params(self) result(num_params)
425460
num_params = this_layer % get_num_params()
426461
type is (self_attention_layer)
427462
num_params = this_layer % get_num_params()
463+
type is (layernorm_layer)
464+
num_params = this_layer % get_num_params()
428465
class default
429466
error stop 'Unknown layer type.'
430467
end select
@@ -458,6 +495,8 @@ module function get_params(self) result(params)
458495
params = this_layer % get_params()
459496
type is (self_attention_layer)
460497
params = this_layer % get_params()
498+
type is (layernorm_layer)
499+
params = this_layer % get_params()
461500
class default
462501
error stop 'Unknown layer type.'
463502
end select
@@ -491,6 +530,8 @@ module function get_gradients(self) result(gradients)
491530
gradients = this_layer % get_gradients()
492531
type is (self_attention_layer)
493532
gradients = this_layer % get_gradients()
533+
type is (layernorm_layer)
534+
gradients = this_layer % get_gradients()
494535
class default
495536
error stop 'Unknown layer type.'
496537
end select
@@ -549,6 +590,9 @@ module subroutine set_params(self, params)
549590
type is (self_attention_layer)
550591
call this_layer % set_params(params)
551592

593+
type is (layernorm_layer)
594+
call this_layer % set_params(params)
595+
552596
type is (maxpool2d_layer)
553597
! No parameters to set.
554598
write(stderr, '(a)') 'Warning: calling set_params() ' &

src/nf/nf_layernorm.f90

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
module nf_layernorm_layer
2+
use nf_activation, only: activation_function
3+
use nf_base_layer, only: base_layer
4+
5+
implicit none
6+
7+
private
8+
public :: layernorm_layer
9+
10+
type, extends(base_layer) :: layernorm_layer
11+
!! Layer Normalization
12+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
13+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
14+
!! https://arxiv.org/abs/1607.06450v1
15+
integer :: sequence_length
16+
integer :: model_dimension
17+
18+
real :: eps
19+
real, allocatable :: gamma(:)
20+
real, allocatable :: beta(:)
21+
22+
real, allocatable :: d_gamma(:)
23+
real, allocatable :: d_beta(:)
24+
real, allocatable :: gradient(:, :)
25+
26+
real, allocatable :: mu(:, :)
27+
real, allocatable :: sigma(:)
28+
29+
real, allocatable :: output(:, :)
30+
31+
! temp storages
32+
real, allocatable, private :: normalized(:, :)
33+
real, allocatable, private :: one_over_sigma(:, :)
34+
real, allocatable, private :: gradient_by_gamma_over_sigma(:, :)
35+
contains
36+
procedure :: forward
37+
procedure :: backward
38+
procedure :: init
39+
procedure :: get_num_params
40+
procedure :: get_params
41+
procedure :: get_gradients
42+
procedure :: set_params
43+
end type layernorm_layer
44+
45+
interface layernorm_layer
46+
module function layernorm_layer_cons() &
47+
result(res)
48+
type(layernorm_layer) :: res
49+
end function layernorm_layer_cons
50+
end interface layernorm_layer
51+
52+
interface
53+
pure module subroutine forward(self, input)
54+
class(layernorm_layer), intent(in out) :: self
55+
real, intent(in) :: input(:, :)
56+
end subroutine forward
57+
58+
pure module subroutine backward(self, input, gradient)
59+
class(layernorm_layer), intent(in out) :: self
60+
real, intent(in) :: input(:, :)
61+
real, intent(in) :: gradient(:, :)
62+
end subroutine backward
63+
64+
module subroutine init(self, input_shape)
65+
class(layernorm_layer), intent(in out) :: self
66+
integer, intent(in) :: input_shape(:)
67+
end subroutine init
68+
69+
pure module function get_num_params(self) result(num_params)
70+
class(layernorm_layer), intent(in) :: self
71+
integer :: num_params
72+
end function get_num_params
73+
74+
75+
module function get_params(self) result(params)
76+
class(layernorm_layer), intent(in), target :: self
77+
real, allocatable :: params(:)
78+
end function get_params
79+
80+
81+
module function get_gradients(self) result(gradients)
82+
class(layernorm_layer), intent(in), target :: self
83+
real, allocatable :: gradients(:)
84+
end function get_gradients
85+
86+
87+
module subroutine set_params(self, params)
88+
class(layernorm_layer), intent(in out) :: self
89+
real, intent(in), target :: params(:)
90+
end subroutine set_params
91+
end interface
92+
end module nf_layernorm_layer

0 commit comments

Comments
 (0)