12
12
use nf_reshape_layer, only: reshape3d_layer
13
13
use nf_linear2d_layer, only: linear2d_layer
14
14
use nf_self_attention_layer, only: self_attention_layer
15
+ use nf_layernorm_layer, only: layernorm_layer
15
16
use nf_optimizers, only: optimizer_base_type
16
17
17
18
contains
@@ -46,7 +47,7 @@ pure module subroutine backward_1d(self, previous, gradient)
46
47
47
48
type is (flatten_layer)
48
49
49
- ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
50
+ ! Upstream layers permitted: input2d, input3d, conv2d, layernorm, maxpool2d
50
51
select type (prev_layer = > previous % p)
51
52
type is (input2d_layer)
52
53
call this_layer % backward(prev_layer % output, gradient)
@@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient)
60
61
call this_layer % backward(prev_layer % output, gradient)
61
62
type is (self_attention_layer)
62
63
call this_layer % backward(prev_layer % output, gradient)
64
+ type is (layernorm_layer)
65
+ call this_layer % backward(prev_layer % output, gradient)
63
66
end select
64
67
65
68
end select
@@ -84,6 +87,8 @@ pure module subroutine backward_2d(self, previous, gradient)
84
87
call this_layer % backward(prev_layer % output, gradient)
85
88
type is (self_attention_layer)
86
89
call this_layer % backward(prev_layer % output, gradient)
90
+ type is (layernorm_layer)
91
+ call this_layer % backward(prev_layer % output, gradient)
87
92
end select
88
93
89
94
type is (self_attention_layer)
@@ -95,8 +100,18 @@ pure module subroutine backward_2d(self, previous, gradient)
95
100
call this_layer % backward(prev_layer % output, gradient)
96
101
type is (self_attention_layer)
97
102
call this_layer % backward(prev_layer % output, gradient)
103
+ type is (layernorm_layer)
104
+ call this_layer % backward(prev_layer % output, gradient)
98
105
end select
99
106
107
+ type is (layernorm_layer)
108
+
109
+ select type (prev_layer = > previous % p)
110
+ type is (linear2d_layer)
111
+ call this_layer % backward(prev_layer % output, gradient)
112
+ type is (self_attention_layer)
113
+ call this_layer % backward(prev_layer % output, gradient)
114
+ end select
100
115
end select
101
116
102
117
end subroutine backward_2d
@@ -234,6 +249,8 @@ module subroutine forward(self, input)
234
249
call this_layer % forward(prev_layer % output)
235
250
type is (linear2d_layer)
236
251
call this_layer % forward(prev_layer % output)
252
+ type is (layernorm_layer)
253
+ call this_layer % forward(prev_layer % output)
237
254
end select
238
255
239
256
type is (reshape3d_layer)
@@ -250,26 +267,40 @@ module subroutine forward(self, input)
250
267
251
268
type is (linear2d_layer)
252
269
253
- ! Upstream layers permitted: input2d, linear2d
270
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
254
271
select type (prev_layer = > input % p)
255
272
type is (input2d_layer)
256
273
call this_layer % forward(prev_layer % output)
257
274
type is (linear2d_layer)
258
275
call this_layer % forward(prev_layer % output)
259
276
type is (self_attention_layer)
260
277
call this_layer % forward(prev_layer % output)
278
+ type is (layernorm_layer)
279
+ call this_layer % forward(prev_layer % output)
261
280
end select
262
281
263
282
type is (self_attention_layer)
264
283
265
- ! Upstream layers permitted: input2d, linear2d
284
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
266
285
select type (prev_layer = > input % p)
267
286
type is (input2d_layer)
268
287
call this_layer % forward(prev_layer % output)
269
288
type is (linear2d_layer)
270
289
call this_layer % forward(prev_layer % output)
271
290
type is (self_attention_layer)
272
291
call this_layer % forward(prev_layer % output)
292
+ type is (layernorm_layer)
293
+ call this_layer % forward(prev_layer % output)
294
+ end select
295
+
296
+ type is (layernorm_layer)
297
+
298
+ ! Upstream layers permitted: linear2d, self_attention
299
+ select type (prev_layer = > input % p)
300
+ type is (linear2d_layer)
301
+ call this_layer % forward(prev_layer % output)
302
+ type is (self_attention_layer)
303
+ call this_layer % forward(prev_layer % output)
273
304
end select
274
305
275
306
end select
@@ -311,6 +342,8 @@ pure module subroutine get_output_2d(self, output)
311
342
allocate (output, source= this_layer % output)
312
343
type is (self_attention_layer)
313
344
allocate (output, source= this_layer % output)
345
+ type is (layernorm_layer)
346
+ allocate (output, source= this_layer % output)
314
347
class default
315
348
error stop ' 2-d output can only be read from an input2d or linear2d layer.'
316
349
@@ -354,8 +387,8 @@ impure elemental module subroutine init(self, input)
354
387
call this_layer % init(input % layer_shape)
355
388
end select
356
389
357
- ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
358
- ! self_attention layers is not known until we receive an input layer.
390
+ ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
391
+ ! self_attention or layernorm layers is not known until we receive an input layer.
359
392
select type (this_layer = > self % p)
360
393
type is (conv2d_layer)
361
394
self % layer_shape = shape (this_layer % output)
@@ -367,6 +400,8 @@ impure elemental module subroutine init(self, input)
367
400
self % layer_shape = shape (this_layer % output)
368
401
type is (self_attention_layer)
369
402
self % layer_shape = shape (this_layer % output)
403
+ type is (layernorm_layer)
404
+ self % layer_shape = shape (this_layer % output)
370
405
type is (maxpool2d_layer)
371
406
self % layer_shape = shape (this_layer % output)
372
407
end select
@@ -425,6 +460,8 @@ elemental module function get_num_params(self) result(num_params)
425
460
num_params = this_layer % get_num_params()
426
461
type is (self_attention_layer)
427
462
num_params = this_layer % get_num_params()
463
+ type is (layernorm_layer)
464
+ num_params = this_layer % get_num_params()
428
465
class default
429
466
error stop ' Unknown layer type.'
430
467
end select
@@ -458,6 +495,8 @@ module function get_params(self) result(params)
458
495
params = this_layer % get_params()
459
496
type is (self_attention_layer)
460
497
params = this_layer % get_params()
498
+ type is (layernorm_layer)
499
+ params = this_layer % get_params()
461
500
class default
462
501
error stop ' Unknown layer type.'
463
502
end select
@@ -491,6 +530,8 @@ module function get_gradients(self) result(gradients)
491
530
gradients = this_layer % get_gradients()
492
531
type is (self_attention_layer)
493
532
gradients = this_layer % get_gradients()
533
+ type is (layernorm_layer)
534
+ gradients = this_layer % get_gradients()
494
535
class default
495
536
error stop ' Unknown layer type.'
496
537
end select
@@ -549,6 +590,9 @@ module subroutine set_params(self, params)
549
590
type is (self_attention_layer)
550
591
call this_layer % set_params(params)
551
592
593
+ type is (layernorm_layer)
594
+ call this_layer % set_params(params)
595
+
552
596
type is (maxpool2d_layer)
553
597
! No parameters to set.
554
598
write (stderr, ' (a)' ) ' Warning: calling set_params() ' &
0 commit comments