-
Notifications
You must be signed in to change notification settings - Fork 3
/
optim-msgd.lua
42 lines (38 loc) · 1.04 KB
/
optim-msgd.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
-- MSGD
-- Nesterov's momentum, see e.g. Sutskever et al., ICML 2013
-- Author: Sixin Zhang ([email protected])
require 'optim'
function optim.msgd(opfunc, w, config, state)
local config = config or {}
local state = state or config
local lr = config.lr or 0
local lrd = config.lrd or 0
local lrp = config.lrp or 0
local mom = config.mom or 0
local mmax = config.mommax or 1
local mlrd = config.momdecay or 0
local l2wd = config.l2wd or 0
state.pversion = state.pversion or 0
if mom > 0 then
if mlrd > 0 then
mom = math.min(mmax, 1-0.5/(1+state.pversion/mlrd))
end
if not state.vt then
state.vt = w:clone():zero()
end
state.vt:mul(mom)
w:add(state.vt)
end
local fx,dfdx = opfunc(w)
if l2wd ~= 0 then dfdx:add(l2wd,w) end
local clr = lr
if lrd > 0 and lrp > 0 then
clr = lr / math.pow(1+state.pversion*lrd,lrp)
end
w:add(-clr,dfdx)
if mom > 0 then
state.vt:add(-clr,dfdx)
end
state.pversion = state.pversion + 1
return w,{fx}
end