-
-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Line Search for Newton-Raphson #194
Changes from 19 commits
866fa81
1983842
c54601f
879e118
58774f1
4b04e2a
72eb6b8
95c89a4
929552d
f6920f7
4982846
b5d9f3e
e1a2da9
2593f9a
b604e5b
1591dff
c74c78d
ea1b466
ad4ea57
18c19ad
0871967
8752556
ff524b0
7673c85
f8b6685
137d8fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ for large-scale and numerically-difficult nonlinear systems. | |
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to | ||
`Val{:forward}` for forward finite differences. For more details on the choices, see the | ||
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. | ||
- `linesearch`: the line search algorithm used. Defaults to no line search being used. | ||
- `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the | ||
linear solves within the Newton method. Defaults to `nothing`, which means it uses the | ||
LinearSolve.jl default algorithm choice. For more information on available algorithm | ||
|
@@ -48,29 +49,33 @@ for large-scale and numerically-difficult nonlinear systems. | |
Currently, the linear solver and chunk size choice only applies to in-place defined | ||
`NonlinearProblem`s. That is expected to change in the future. | ||
""" | ||
struct NewtonRaphson{CS, AD, FDT, L, P, ST, CJ} <: | ||
struct NewtonRaphson{CS, AD, FDT, L, P, ST, CJ, LS} <: | ||
AbstractNewtonAlgorithm{CS, AD, FDT, ST, CJ} | ||
linsolve::L | ||
precs::P | ||
linesearch::LS | ||
end | ||
|
||
function NewtonRaphson(; chunk_size = Val{0}(), autodiff = Val{true}(), | ||
standardtag = Val{true}(), concrete_jac = nothing, | ||
diff_type = Val{:forward}, linsolve = nothing, precs = DEFAULT_PRECS) | ||
diff_type = Val{:forward}, linesearch = LineSearches.Static(), linsolve = nothing, precs = DEFAULT_PRECS) | ||
NewtonRaphson{_unwrap_val(chunk_size), _unwrap_val(autodiff), diff_type, | ||
typeof(linsolve), typeof(precs), _unwrap_val(standardtag), | ||
_unwrap_val(concrete_jac)}(linsolve, | ||
precs) | ||
_unwrap_val(concrete_jac), typeof(linesearch)}(linsolve, | ||
precs, linesearch) | ||
end | ||
|
||
mutable struct NewtonRaphsonCache{iip, fType, algType, uType, duType, resType, pType, | ||
mutable struct NewtonRaphsonCache{iip, fType, algType, uType, duType, resType, pType, foType, goType, | ||
INType, tolType, | ||
probType, ufType, L, jType, JC} | ||
f::fType | ||
alg::algType | ||
u::uType | ||
fu::resType | ||
p::pType | ||
α::Real | ||
f_o::foType # stores value of objective | ||
yash2798 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
g_o::goType # stores grad of objective at x | ||
uf::ufType | ||
linsolve::L | ||
J::jType | ||
|
@@ -85,19 +90,19 @@ mutable struct NewtonRaphsonCache{iip, fType, algType, uType, duType, resType, p | |
stats::NLStats | ||
|
||
function NewtonRaphsonCache{iip}(f::fType, alg::algType, u::uType, fu::resType, | ||
p::pType, uf::ufType, linsolve::L, J::jType, | ||
p::pType, α::Real, f_o::foType, g_o::goType, uf::ufType, linsolve::L, J::jType, | ||
du1::duType, | ||
jac_config::JC, force_stop::Bool, maxiters::Int, | ||
internalnorm::INType, | ||
retcode::SciMLBase.ReturnCode.T, abstol::tolType, | ||
prob::probType, | ||
stats::NLStats) where { | ||
iip, fType, algType, uType, | ||
duType, resType, pType, INType, | ||
duType, resType, pType, foType, goType, INType, | ||
tolType, | ||
probType, ufType, L, jType, JC} | ||
new{iip, fType, algType, uType, duType, resType, pType, INType, tolType, | ||
probType, ufType, L, jType, JC}(f, alg, u, fu, p, | ||
new{iip, fType, algType, uType, duType, resType, pType, foType, goType, INType, tolType, | ||
probType, ufType, L, jType, JC}(f, alg, u, fu, p, α, f_o, g_o, | ||
uf, linsolve, J, du1, jac_config, | ||
force_stop, maxiters, internalnorm, | ||
retcode, abstol, prob, stats) | ||
|
@@ -126,7 +131,7 @@ function jacobian_caches(alg::NewtonRaphson, f, u, p, ::Val{true}) | |
end | ||
|
||
function jacobian_caches(alg::NewtonRaphson, f, u, p, ::Val{false}) | ||
JacobianWrapper(f, p), nothing, nothing, nothing, nothing | ||
JacobianWrapper(f, p), nothing, nothing, zero(u), nothing | ||
end | ||
|
||
function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson, | ||
|
@@ -143,6 +148,9 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson | |
end | ||
f = prob.f | ||
p = prob.p | ||
α = 1.0 #line search coefficient | ||
f_o = 0.0 | ||
g_o = zero(u) | ||
if iip | ||
fu = zero(u) | ||
f(fu, u, p) | ||
|
@@ -151,7 +159,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::NewtonRaphson | |
end | ||
uf, linsolve, J, du1, jac_config = jacobian_caches(alg, f, u, p, Val(iip)) | ||
|
||
return NewtonRaphsonCache{iip}(f, alg, u, fu, p, uf, linsolve, J, du1, jac_config, | ||
return NewtonRaphsonCache{iip}(f, alg, u, fu, p, α, f_o, g_o, uf, linsolve, J, du1, jac_config, | ||
false, maxiters, internalnorm, | ||
ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0)) | ||
end | ||
|
@@ -160,13 +168,13 @@ function perform_step!(cache::NewtonRaphsonCache{true}) | |
@unpack u, fu, f, p, alg = cache | ||
@unpack J, linsolve, du1 = cache | ||
jacobian!(J, cache) | ||
|
||
# u = u - J \ fu | ||
linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), linu = _vec(du1), | ||
p = p, reltol = cache.abstol) | ||
cache.linsolve = linres.cache | ||
@. u = u - du1 | ||
f(fu, u, p) | ||
perform_linesearch!(cache) | ||
@. cache.u = cache.u - cache.α * cache.du1 | ||
f(cache.fu, cache.u, p) | ||
|
||
if cache.internalnorm(cache.fu) < cache.abstol | ||
cache.force_stop = true | ||
|
@@ -181,7 +189,9 @@ end | |
function perform_step!(cache::NewtonRaphsonCache{false}) | ||
@unpack u, fu, f, p = cache | ||
J = jacobian(cache, f) | ||
cache.u = u - J \ fu | ||
cache.du1 = J \ fu | ||
yash2798 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
perform_linesearch!(cache) | ||
cache.u = cache.u - cache.α * cache.du1 | ||
cache.fu = f(cache.u, p) | ||
if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol | ||
cache.force_stop = true | ||
|
@@ -193,6 +203,43 @@ function perform_step!(cache::NewtonRaphsonCache{false}) | |
return nothing | ||
end | ||
|
||
function objective_linesearch(cache::NewtonRaphsonCache) ## returns the objective functions required in LS | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function |
||
@unpack f, p = cache | ||
|
||
function fo(x) | ||
return dot(value_f(cache, x), value_f(cache, x)) / 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the same function called twice? |
||
end | ||
|
||
function g!(g_o, x) | ||
mul!(g_o, simple_jacobian(cache, x)', value_f(cache, x)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a good way to compute this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be a vjp with reversemode ad? |
||
g_o | ||
end | ||
|
||
function fg!(g_o, x) | ||
g!(g_o, x) | ||
fo(x) | ||
end | ||
return fo, g!, fg! | ||
end | ||
|
||
function perform_linesearch!(cache::NewtonRaphsonCache) | ||
@unpack u, fu, du1, alg, α, g_o = cache | ||
fo, g!, fg! = objective_linesearch(cache) | ||
ϕ(α) = fo(u .- α .* du1) | ||
|
||
function dϕ(α) | ||
g!(g_o, u .- α .* du1) | ||
return dot(g_o, du1) | ||
end | ||
|
||
function ϕdϕ(α) | ||
return (fg!(g_o, u .- α .* du1), dot(g_o, du1)) | ||
end | ||
cache.f_o = fo(u) | ||
@unpack f_o = cache | ||
cache.α, cache.f_o = alg.linesearch(ϕ, dϕ, ϕdϕ, 1.0, f_o, dot(du1, g_o)) | ||
end | ||
|
||
function SciMLBase.solve!(cache::NewtonRaphsonCache) | ||
while !cache.force_stop && cache.stats.nsteps < cache.maxiters | ||
perform_step!(cache) | ||
|
@@ -228,3 +275,18 @@ function SciMLBase.reinit!(cache::NewtonRaphsonCache{iip}, u0 = cache.u; p = cac | |
cache.retcode = ReturnCode.Default | ||
return cache | ||
end | ||
|
||
## some helper func ### | ||
|
||
function value_f(cache::NewtonRaphsonCache, x) | ||
iip = get_iip(cache.prob) | ||
@unpack f, p = cache | ||
if iip | ||
res = zero(x) | ||
f(res, x, p) | ||
else | ||
res = f(x, p) | ||
end | ||
res | ||
end | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't the nonlinear solve be responsible for providing the jacobian? What if the problem doesn't support autodiff?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So should I just use
FiniteDiff
whenautodiff
is passed asfalse
? That should be a correct approach?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is already handled by other dispatches. Why does this exist?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, however in the current dispatch, the jacobian is calculated at$\alpha_{k}$ ), so we can't just do
cache.u
, i.e. the dispatch takes incache
as an argument and not thex
. But in when linesearch is performed, we need the jacobian at multiplex
's (for multiple step lengthsjacobian(f, cache)
as in the current dispatch.for example like here (https://github.com/JuliaNLSolvers/LineSearches.jl/blob/ded667a80f47886c77d67e8890f6adb127679ab4/src/strongwolfe.jl#L84)
i can't think of any other way to approach this, any inputs will be helpful?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
although i have an idea like this roughly
function simple_jacobian(cache, x)
cache.tmp_var = cache.u
cache.u = x
J = jacobian(cache, f) ## our current dispatch
cache.u = cache.tmp_var
end
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this way we don't have to deal with sparsity and AD/Finite diff separately
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it should be something like that reusing the existing machinery. But the vjp shouldn't build the Jacobian at all: you don't actually need to calculate the Jacobian for most of this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes you are absolutely right. so we need$J^{T}v = (v^{T}J)^{T}$ . We can calculate vjp $v^{T}J$ . are there any routines in
SparseDiffTools.jl
or do we need to do this kind of 'in-situ'There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if in-situ, can you point me to something that i can take help from regarding reverse-mode AD, i am not very well versed with it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's precisely what Reverse-Mode AD calculates without calculating the Jacobian. This is why I want @avik-pal to take a look and see how that would mesh with #206