16
16
use nf_reshape_layer, only: reshape3d_layer
17
17
use nf_linear2d_layer, only: linear2d_layer
18
18
use nf_self_attention_layer, only: self_attention_layer
19
+ use nf_layernorm_layer, only: layernorm_layer
19
20
use nf_optimizers, only: optimizer_base_type
20
21
21
22
contains
@@ -49,7 +50,6 @@ pure module subroutine backward_1d(self, previous, gradient)
49
50
call this_layer % backward(gradient)
50
51
51
52
type is (flatten_layer)
52
-
53
53
! Upstream layers permitted: input2d, input3d, conv1d, conv2d, locally_connected_1d, maxpool1d, maxpool2d
54
54
select type (prev_layer = > previous % p)
55
55
type is (input2d_layer)
@@ -70,6 +70,8 @@ pure module subroutine backward_1d(self, previous, gradient)
70
70
call this_layer % backward(prev_layer % output, gradient)
71
71
type is (self_attention_layer)
72
72
call this_layer % backward(prev_layer % output, gradient)
73
+ type is (layernorm_layer)
74
+ call this_layer % backward(prev_layer % output, gradient)
73
75
end select
74
76
75
77
end select
@@ -94,6 +96,8 @@ pure module subroutine backward_2d(self, previous, gradient)
94
96
call this_layer % backward(prev_layer % output, gradient)
95
97
type is (self_attention_layer)
96
98
call this_layer % backward(prev_layer % output, gradient)
99
+ type is (layernorm_layer)
100
+ call this_layer % backward(prev_layer % output, gradient)
97
101
end select
98
102
99
103
type is (self_attention_layer)
@@ -105,8 +109,18 @@ pure module subroutine backward_2d(self, previous, gradient)
105
109
call this_layer % backward(prev_layer % output, gradient)
106
110
type is (self_attention_layer)
107
111
call this_layer % backward(prev_layer % output, gradient)
112
+ type is (layernorm_layer)
113
+ call this_layer % backward(prev_layer % output, gradient)
108
114
end select
109
115
116
+ type is (layernorm_layer)
117
+
118
+ select type (prev_layer = > previous % p)
119
+ type is (linear2d_layer)
120
+ call this_layer % backward(prev_layer % output, gradient)
121
+ type is (self_attention_layer)
122
+ call this_layer % backward(prev_layer % output, gradient)
123
+ end select
110
124
end select
111
125
112
126
! Backward pass from a 2-d layer downstream currently implemented
@@ -358,6 +372,8 @@ module subroutine forward(self, input)
358
372
call this_layer % forward(prev_layer % output)
359
373
type is (linear2d_layer)
360
374
call this_layer % forward(prev_layer % output)
375
+ type is (layernorm_layer)
376
+ call this_layer % forward(prev_layer % output)
361
377
end select
362
378
363
379
type is (reshape3d_layer)
@@ -380,26 +396,40 @@ module subroutine forward(self, input)
380
396
381
397
type is (linear2d_layer)
382
398
383
- ! Upstream layers permitted: input2d, linear2d
399
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
384
400
select type (prev_layer = > input % p)
385
401
type is (input2d_layer)
386
402
call this_layer % forward(prev_layer % output)
387
403
type is (linear2d_layer)
388
404
call this_layer % forward(prev_layer % output)
389
405
type is (self_attention_layer)
390
406
call this_layer % forward(prev_layer % output)
407
+ type is (layernorm_layer)
408
+ call this_layer % forward(prev_layer % output)
391
409
end select
392
410
393
411
type is (self_attention_layer)
394
412
395
- ! Upstream layers permitted: input2d, linear2d
413
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
396
414
select type (prev_layer = > input % p)
397
415
type is (input2d_layer)
398
416
call this_layer % forward(prev_layer % output)
399
417
type is (linear2d_layer)
400
418
call this_layer % forward(prev_layer % output)
401
419
type is (self_attention_layer)
402
420
call this_layer % forward(prev_layer % output)
421
+ type is (layernorm_layer)
422
+ call this_layer % forward(prev_layer % output)
423
+ end select
424
+
425
+ type is (layernorm_layer)
426
+
427
+ ! Upstream layers permitted: linear2d, self_attention
428
+ select type (prev_layer = > input % p)
429
+ type is (linear2d_layer)
430
+ call this_layer % forward(prev_layer % output)
431
+ type is (self_attention_layer)
432
+ call this_layer % forward(prev_layer % output)
403
433
end select
404
434
405
435
end select
@@ -449,6 +479,8 @@ pure module subroutine get_output_2d(self, output)
449
479
allocate (output, source= this_layer % output)
450
480
type is (self_attention_layer)
451
481
allocate (output, source= this_layer % output)
482
+ type is (layernorm_layer)
483
+ allocate (output, source= this_layer % output)
452
484
class default
453
485
error stop ' 2-d output can only be read from an input2d or linear2d layer.'
454
486
@@ -492,8 +524,8 @@ impure elemental module subroutine init(self, input)
492
524
call this_layer % init(input % layer_shape)
493
525
end select
494
526
495
- ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
496
- ! self_attention layers is not known until we receive an input layer.
527
+ ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
528
+ ! self_attention or layernorm layers is not known until we receive an input layer.
497
529
select type (this_layer = > self % p)
498
530
type is (conv1d_layer)
499
531
self % layer_shape = shape (this_layer % output)
@@ -511,6 +543,8 @@ impure elemental module subroutine init(self, input)
511
543
self % layer_shape = shape (this_layer % output)
512
544
type is (self_attention_layer)
513
545
self % layer_shape = shape (this_layer % output)
546
+ type is (layernorm_layer)
547
+ self % layer_shape = shape (this_layer % output)
514
548
type is (maxpool2d_layer)
515
549
self % layer_shape = shape (this_layer % output)
516
550
end select
@@ -577,6 +611,8 @@ elemental module function get_num_params(self) result(num_params)
577
611
num_params = this_layer % get_num_params()
578
612
type is (self_attention_layer)
579
613
num_params = this_layer % get_num_params()
614
+ type is (layernorm_layer)
615
+ num_params = this_layer % get_num_params()
580
616
class default
581
617
error stop ' Unknown layer type.'
582
618
end select
@@ -618,6 +654,8 @@ module function get_params(self) result(params)
618
654
params = this_layer % get_params()
619
655
type is (self_attention_layer)
620
656
params = this_layer % get_params()
657
+ type is (layernorm_layer)
658
+ params = this_layer % get_params()
621
659
class default
622
660
error stop ' Unknown layer type.'
623
661
end select
@@ -659,6 +697,8 @@ module function get_gradients(self) result(gradients)
659
697
gradients = this_layer % get_gradients()
660
698
type is (self_attention_layer)
661
699
gradients = this_layer % get_gradients()
700
+ type is (layernorm_layer)
701
+ gradients = this_layer % get_gradients()
662
702
class default
663
703
error stop ' Unknown layer type.'
664
704
end select
@@ -728,6 +768,9 @@ module subroutine set_params(self, params)
728
768
type is (self_attention_layer)
729
769
call this_layer % set_params(params)
730
770
771
+ type is (layernorm_layer)
772
+ call this_layer % set_params(params)
773
+
731
774
type is (maxpool2d_layer)
732
775
! No parameters to set.
733
776
write (stderr, ' (a)' ) ' Warning: calling set_params() ' &
0 commit comments