forked from thushv89/SurvivalNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LineSearch.py
325 lines (271 loc) · 9.34 KB
/
LineSearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 29 06:53:56 2016
@author: Safoora
"""
from warnings import warn
import numpy
from scipy.optimize import minpack2
from scipy.optimize.linesearch import _zoom
class LineSearchWarning(RuntimeWarning):
pass
class _LineSearchError(RuntimeError):
pass
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9,
amax=50, amin=1e-8, xtol=1e-14):
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None:
derphi0 = derphi(0.)
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = .01
phi1 = phi0
derphi1 = derphi0
isave = numpy.zeros((2,), numpy.intc)
dsave = numpy.zeros((13,), float)
task = b'START'
maxiter = 15
for i in xrange(maxiter):
stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
c1, c2, xtol, task,
amin, amax, isave, dsave)
#print "alpha1 = ", alpha1
if task[:2] == b'FG':
alpha1 = stp
phi1 = phi(stp)
derphi1 = derphi(stp)
else:
break
else:
# maxiter reached, the line search did not converge
stp = None
if task[:5] == b'ERROR' or task[:4] == b'WARN':
stp = None # failed
return stp, phi1, phi0
def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
old_fval=None, old_old_fval=None,
args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
xtol=1e-14):
if gfk is None:
gfk = fprime(xk)
if isinstance(fprime, tuple):
eps = fprime[1]
fprime = fprime[0]
newargs = (f, eps) + args
gradient = False
else:
newargs = args
gradient = True
gval = [gfk]
gc = [0]
fc = [0]
def phi(s):
fc[0] += 1
return f(xk + s*pk, *args)
def derphi(s):
gval[0] = fprime(xk + s*pk, *newargs)
if gradient:
gc[0] += 1
else:
fc[0] += len(xk) + 1
return numpy.dot(gval[0], pk)
derphi0 = numpy.dot(gfk, pk)
stp, fval, old_fval = scalar_search_wolfe1(
phi, derphi, old_fval, old_old_fval, derphi0,
c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
return stp, fc[0], gc[0], fval, old_fval, gval[0]
def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=50):
"""Find alpha that satisfies strong Wolfe conditions.
Parameters
----------
f : callable f(x,*args)
Objective function.
myfprime : callable f'(x,*args)
Objective function gradient.
xk : ndarray
Starting point.
pk : ndarray
Search direction.
gfk : ndarray, optional
Gradient value for x=xk (xk being the current parameter
estimate). Will be recomputed if omitted.
old_fval : float, optional
Function value for x=xk. Will be recomputed if omitted.
old_old_fval : float, optional
Function value for the point preceding x=xk
args : tuple, optional
Additional arguments passed to objective function.
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax : float, optional
Maximum step size
Returns
-------
alpha : float or None
Alpha for which ``x_new = x0 + alpha * pk``,
or None if the line search algorithm did not converge.
fc : int
Number of function evaluations made.
gc : int
Number of gradient evaluations made.
new_fval : float or None
New function value ``f(x_new)=f(x0+alpha*pk)``,
or None if the line search algorithm did not converge.
old_fval : float
Old function value ``f(x0)``.
new_slope : float or None
The local slope along the search direction at the
new value ``<myfprime(x_new), pk>``,
or None if the line search algorithm did not converge.
Notes
-----
Uses the line search algorithm to enforce strong Wolfe
conditions. See Wright and Nocedal, 'Numerical Optimization',
1999, pg. 59-60.
For the zoom phase it uses an algorithm by [...].
"""
fc = [0]
gc = [0]
gval = [None]
def phi(alpha):
fc[0] += 1
return f(xk + alpha * pk, *args)
if isinstance(myfprime, tuple):
def derphi(alpha):
fc[0] += len(xk) + 1
eps = myfprime[1]
fprime = myfprime[0]
newargs = (f, eps) + args
gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use
return numpy.dot(gval[0], pk)
else:
fprime = myfprime
def derphi(alpha):
gc[0] += 1
gval[0] = fprime(xk + alpha * pk, *args) # store for later use
return numpy.dot(gval[0], pk)
if gfk is None:
gfk = fprime(xk, *args)
derphi0 = numpy.dot(gfk, pk)
alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax)
if derphi_star is None:
warn('The line search algorithm did not converge', LineSearchWarning)
else:
# derphi_star is a number (derphi) -- so use the most recently
# calculated gradient used in computing it derphi = gfk*pk
# this is the gradient at the next step no need to compute it
# again in the outer loop.
derphi_star = gval[0]
return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
def scalar_search_wolfe2(phi, derphi=None, phi0=None,
old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9, amax=50):
"""Find alpha that satisfies strong Wolfe conditions.
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable f(x)
Objective scalar function.
derphi : callable f'(x), optional
Objective function derivative (can be None)
phi0 : float, optional
Value of phi at s=0
old_phi0 : float, optional
Value of phi at previous point
derphi0 : float, optional
Value of derphi at s=0
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax : float, optional
Maximum step size
Returns
-------
alpha_star : float or None
Best alpha, or None if the line search algorithm did not converge.
phi_star : float
phi at alpha_star
phi0 : float
phi at 0
derphi_star : float or None
derphi at alpha_star, or None if the line search algorithm
did not converge.
Notes
-----
Uses the line search algorithm to enforce strong Wolfe
conditions. See Wright and Nocedal, 'Numerical Optimization',
1999, pg. 59-60.
For the zoom phase it uses an algorithm by [...].
"""
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None and derphi is not None:
derphi0 = derphi(0.)
alpha0 = 0
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
else:
alpha1 = .01
if alpha1 < 0:
alpha1 = .01
if alpha1 == 0:
# This shouldn't happen. Perhaps the increment has slipped below
# machine precision? For now, set the return variables skip the
# useless while loop, and raise warnflag=2 due to possible imprecision.
alpha_star = None
phi_star = phi0
phi0 = old_phi0
derphi_star = None
phi_a1 = phi(alpha1)
#derphi_a1 = derphi(alpha1) evaluated below
phi_a0 = phi0
derphi_a0 = derphi0
i = 1
maxiter = 10
for i in xrange(maxiter):
#print "alpha2 = ", alpha1
if alpha1 == 0:
break
if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
((phi_a1 >= phi_a0) and (i > 1)):
alpha_star, phi_star, derphi_star = \
_zoom(alpha0, alpha1, phi_a0,
phi_a1, derphi_a0, phi, derphi,
phi0, derphi0, c1, c2)
break
derphi_a1 = derphi(alpha1)
if (abs(derphi_a1) <= -c2*derphi0):
alpha_star = alpha1
phi_star = phi_a1
derphi_star = derphi_a1
break
if (derphi_a1 >= 0):
alpha_star, phi_star, derphi_star = \
_zoom(alpha1, alpha0, phi_a1,
phi_a0, derphi_a1, phi, derphi,
phi0, derphi0, c1, c2)
break
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
i = i + 1
alpha0 = alpha1
alpha1 = alpha2
phi_a0 = phi_a1
phi_a1 = phi(alpha1)
derphi_a0 = derphi_a1
else:
# stopping test maxiter reached
alpha_star = alpha1
phi_star = phi_a1
derphi_star = None
warn('The line search algorithm did not converge', LineSearchWarning)
return alpha_star, phi_star, phi0, derphi_star