-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimizers.rb
88 lines (75 loc) · 1.75 KB
/
optimizers.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
require 'numo/narray'
class SGD
def initialize(lr: 0.01)
@lr = lr
end
def update(params:, grads:)
params.keys.each do |key|
params[key][0] -= @lr * grads[key]
end
end
end
class Momentum
def initialize(lr: 0.01, momentum: 0.9)
@lr = lr
@momentum = momentum
@v = nil
end
def update(params:, grads:)
if @v.nil?
@v = {}
params.each do |key, value|
@v[key] = Numo::DFloat.zeros(value.first.shape)
end
end
params.keys.each do |key|
@v[key] = @momentum * @v[key] - @lr * grads[key]
params[key][0] += @v[key]
end
end
end
class AdaGrad
def initialize(lr: 0.01)
@lr = lr
@h = nil
end
def update(params:, grads:)
if @h.nil?
@h = {}
params.each do |key, value|
@h[key] = Numo::DFloat.zeros(value.first.shape)
end
end
params.keys.each do |key|
@h[key] += grads[key] * grads[key]
params[key][0] -= @lr * grads[key] / (Numo::DFloat::Math.sqrt(@h[key]) + 1e-7)
end
end
end
class Adam
def initialize(lr: 0.001, beta1: 0.9, beta2: 0.999)
@lr = lr
@beta1 = beta1
@beta2 = beta2
@iter = 0
@m = nil
@v = nil
end
def update(params:, grads:)
if @m.nil?
@m = {}
@v = {}
params.each do |key, value|
@m[key] = Numo::DFloat.zeros(value.first.shape)
@v[key] = Numo::DFloat.zeros(value.first.shape)
end
end
@iter += 1
lr_t = @lr * Numo::DFloat::Math.sqrt(1.0 - @beta2 ** @iter) / (1.0 - @beta1 ** @iter)
params.keys.each do |key|
@m[key] += (1 - @beta1) * (grads[key] - @m[key])
@v[key] += (1 - @beta2) * (grads[key] ** 2 - @v[key])
params[key][0] -= lr_t * @m[key] / (Numo::DFloat::Math.sqrt(@v[key]) + 1e-7)
end
end
end