diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 75f12a2e9..e0d7a57e5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -3,9 +3,13 @@ on: pull_request: branches: - master + paths-ignore: + - 'docs/**' push: branches: - master + paths-ignore: + - 'docs/**' jobs: test: runs-on: ubuntu-latest diff --git a/Project.toml b/Project.toml index 044751c0c..c7decafdb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NonlinearSolve" uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" authors = ["SciML"] -version = "2.0.0" +version = "2.0.1" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" diff --git a/docs/Project.toml b/docs/Project.toml index df765bb1d..42e261955 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -12,7 +12,7 @@ Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4" [compat] BenchmarkTools = "1" -Documenter = "0.27" +Documenter = "1" LinearSolve = "2" NonlinearSolve = "1, 2" NonlinearSolveMINPACK = "0.1" diff --git a/docs/make.jl b/docs/make.jl index d2815c502..8f895b0b4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,15 +11,9 @@ makedocs(sitename = "NonlinearSolve.jl", modules = [NonlinearSolve, NonlinearSolve.SciMLBase, NonlinearSolve.DiffEqBase, SimpleNonlinearSolve, Sundials, SciMLNLSolve, NonlinearSolveMINPACK, SteadyStateDiffEq], - clean = true, doctest = false, - strict = [ - :doctest, - :linkcheck, - :parse_error, - :example_block, - # Other available options are - # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block - ], + clean = true, doctest = false, linkcheck = true, + linkcheck_ignore = ["https://twitter.com/ChrisRackauckas/status/1544743542094020615"], + warnonly = [:missing_docs, :cross_references], format = Documenter.HTML(assets = ["assets/favicon.ico"], canonical = "https://docs.sciml.ai/NonlinearSolve/stable/"), pages = pages) diff --git a/docs/src/basics/TerminationCondition.md b/docs/src/basics/TerminationCondition.md index cd780b997..e966bb22c 100644 --- a/docs/src/basics/TerminationCondition.md +++ b/docs/src/basics/TerminationCondition.md @@ -1,6 +1,6 @@ # Termination Conditions -Provides a API to specify termination conditions for [`NonLinearProblem`](@ref) and +Provides a API to specify termination conditions for [`NonlinearProblem`](@ref) and [`SteadyStateProblem`](@ref). For details on the various termination modes, i.e., NLSolveTerminationMode, see the documentation for [`NLSolveTerminationCondition`](@ref). diff --git a/docs/src/index.md b/docs/src/index.md index c47fb57de..3d7e97890 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -82,32 +82,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide ``` -```@raw html -You can also download the -manifest file and the -project file. +link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version * + "/assets/Manifest.toml" +link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version * + "/assets/Project.toml" +Markdown.parse("""You can also download the +[manifest]($link_manifest) +file and the +[project]($link_project) +file. +""") ``` diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 615f96c03..20a0eaf2f 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -39,36 +39,39 @@ include("linesearch.jl") include("raphson.jl") include("trustRegion.jl") include("levenberg.jl") +include("pseudotransient.jl") include("jacobian.jl") include("ad.jl") import PrecompileTools -PrecompileTools.@compile_workload begin - for T in (Float32, Float64) - prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) +@static if VERSION >= v"1.10" + PrecompileTools.@compile_workload begin + for T in (Float32, Float64) + prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) precompile_algs = if VERSION ≥ v"1.7" - (NewtonRaphson(), TrustRegion(), LevenbergMarquardt()) + (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient()) else (NewtonRaphson(),) end - for alg in precompile_algs - solve(prob, alg, abstol = T(1e-2)) - end + for alg in precompile_algs + solve(prob, alg, abstol = T(1e-2)) + end - prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1], - T[2]) - for alg in precompile_algs - solve(prob, alg, abstol = T(1e-2)) + prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1], + T[2]) + for alg in precompile_algs + solve(prob, alg, abstol = T(1e-2)) + end end end end export RadiusUpdateSchemes -export NewtonRaphson, TrustRegion, LevenbergMarquardt +export NewtonRaphson, TrustRegion, LevenbergMarquardt, PseudoTransient export LineSearch diff --git a/src/jacobian.jl b/src/jacobian.jl index 01c030462..1ff29d803 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -85,7 +85,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii end du = _mutable_zero(u) - linprob = LinearProblem(J, _vec(fu); u0 = _vec(du)) + if alg isa PseudoTransient + alpha = convert(eltype(u), alg.alpha_initial) + J_new = J - (1 / alpha) * I + linprob = LinearProblem(J_new, _vec(fu); u0 = _vec(du)) + else + linprob = LinearProblem(J, _vec(fu); u0 = _vec(du)) + end weight = similar(u) recursivefill!(weight, true) diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl new file mode 100644 index 000000000..c3308dfb6 --- /dev/null +++ b/src/pseudotransient.jl @@ -0,0 +1,155 @@ +@concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD} + ad::AD + linsolve + precs + alpha_initial +end + +concrete_jac(::PseudoTransient{CJ}) where {CJ} = CJ + +function PseudoTransient(; concrete_jac = nothing, linsolve = nothing, + precs = DEFAULT_PRECS, alpha_initial = 1e-3, adkwargs...) + ad = default_adargs_to_adtype(; adkwargs...) + return PseudoTransient{_unwrap_val(concrete_jac)}(ad, linsolve, precs, alpha_initial) +end + +@concrete mutable struct PseudoTransientCache{iip} + f + alg + u + fu1 + fu2 + du + p + alpha + res_norm + uf + linsolve + J + jac_cache + force_stop + maxiters::Int + internalnorm + retcode::ReturnCode.T + abstol + prob + stats::NLStats +end + +isinplace(::PseudoTransientCache{iip}) where {iip} = iip + +function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...; + alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM, + linsolve_kwargs = (;), + kwargs...) where {uType, iip} + @unpack f, u0, p = prob + u = alias_u0 ? u0 : deepcopy(u0) + if iip + fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype + f(fu1, u, p) + else + fu1 = _mutable(f(u, p)) + end + uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, + f, + u, + p, + Val(iip); + linsolve_kwargs) + alpha = convert(eltype(u), alg.alpha_initial) + res_norm = internalnorm(fu1) + + return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf, + linsolve, J, + jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, + NLStats(1, 0, 0, 0, 0)) +end + +function perform_step!(cache::PseudoTransientCache{true}) + @unpack u, fu1, f, p, alg, J, linsolve, du, alpha = cache + jacobian!!(J, cache) + J_new = J - (1 / alpha) * I + + # u = u - J \ fu + linres = dolinsolve(alg.precs, linsolve; A = J_new, b = _vec(fu1), linu = _vec(du), + p, reltol = cache.abstol) + cache.linsolve = linres.cache + @. u = u - du + f(fu1, u, p) + + new_norm = cache.internalnorm(fu1) + cache.alpha *= cache.res_norm / new_norm + cache.res_norm = new_norm + + new_norm < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + cache.stats.njacs += 1 + cache.stats.nsolve += 1 + cache.stats.nfactors += 1 + return nothing +end + +function perform_step!(cache::PseudoTransientCache{false}) + @unpack u, fu1, f, p, alg, linsolve, alpha = cache + + cache.J = jacobian!!(cache.J, cache) + # u = u - J \ fu + if linsolve === nothing + cache.du = fu1 / (cache.J - (1 / alpha) * I) + else + linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I, + b = _vec(fu1), + linu = _vec(cache.du), p, reltol = cache.abstol) + cache.linsolve = linres.cache + end + cache.u = @. u - cache.du # `u` might not support mutation + cache.fu1 = f(cache.u, p) + + new_norm = cache.internalnorm(fu1) + cache.alpha *= cache.res_norm / new_norm + cache.res_norm = new_norm + new_norm < cache.abstol && (cache.force_stop = true) + cache.stats.nf += 1 + cache.stats.njacs += 1 + cache.stats.nsolve += 1 + cache.stats.nfactors += 1 + return nothing +end + +function SciMLBase.solve!(cache::PseudoTransientCache) + while !cache.force_stop && cache.stats.nsteps < cache.maxiters + perform_step!(cache) + cache.stats.nsteps += 1 + end + + if cache.stats.nsteps == cache.maxiters + cache.retcode = ReturnCode.MaxIters + else + cache.retcode = ReturnCode.Success + end + + return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1; + cache.retcode, cache.stats) +end + +function SciMLBase.reinit!(cache::PseudoTransientCache{iip}, u0 = cache.u; p = cache.p, + abstol = cache.abstol, maxiters = cache.maxiters) where {iip} + cache.p = p + if iip + recursivecopy!(cache.u, u0) + cache.f(cache.fu1, cache.u, p) + else + # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter + cache.u = u0 + cache.fu1 = cache.f(cache.u, p) + end + cache.alpha = convert(eltype(u), 1e-3) + cache.res_norm = internalnorm(cache.fu1) + cache.abstol = abstol + cache.maxiters = maxiters + cache.stats.nf = 1 + cache.stats.nsteps = 1 + cache.force_stop = false + cache.retcode = ReturnCode.Default + return cache +end diff --git a/src/trustRegion.jl b/src/trustRegion.jl index 20577ef67..f2ccb7f63 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -25,6 +25,20 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows: """ Simple + """ + `RadiusUpdateSchemes.NLsolve` + + The same updating scheme as in NLsolve's (https://github.com/JuliaNLSolvers/NLsolve.jl) trust region dogleg implementation. + """ + NLsolve + + """ + `RadiusUpdateSchemes.NocedalWright` + + Trust region updating scheme as in Nocedal and Wright [see Alg 11.5, page 291]. + """ + NocedalWright + """ `RadiusUpdateSchemes.Hei` @@ -146,7 +160,7 @@ end function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS, radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1, - step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4, + step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4, expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4, expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...) ad = default_adargs_to_adtype(; adkwargs...) @@ -187,8 +201,10 @@ end H g shrink_counter::Int - step_size + du u_tmp + u_gauss_newton + u_cauchy fu_new make_new_J::Bool r::floatType @@ -212,55 +228,68 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion, loss = get_loss(fu1) uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) - - radius_update_scheme = alg.radius_update_scheme - max_trust_radius = convert(eltype(u), alg.max_trust_radius) - initial_trust_radius = convert(eltype(u), alg.initial_trust_radius) - step_threshold = convert(eltype(u), alg.step_threshold) - shrink_threshold = convert(eltype(u), alg.shrink_threshold) - expand_threshold = convert(eltype(u), alg.expand_threshold) - shrink_factor = convert(eltype(u), alg.shrink_factor) - expand_factor = convert(eltype(u), alg.expand_factor) - # Set default trust region radius if not specified - if iszero(max_trust_radius) - max_trust_radius = convert(eltype(u), max(norm(fu1), maximum(u) - minimum(u))) - end - if iszero(initial_trust_radius) - initial_trust_radius = convert(eltype(u), max_trust_radius / 11) - end + u_tmp = zero(u) + u_cauchy = zero(u) + u_gauss_newton = zero(u) loss_new = loss H = zero(J) g = _mutable_zero(fu1) shrink_counter = 0 - step_size = zero(u) fu_new = zero(fu1) make_new_J = true r = loss + floatType = typeof(r) + + # set trust region update scheme + radius_update_scheme = alg.radius_update_scheme + + # set default type for all trust region parameters + trustType = floatType + if radius_update_scheme == RadiusUpdateSchemes.NLsolve + max_trust_radius = convert(trustType, Inf) + initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType) + else + max_trust_radius = convert(trustType, alg.max_trust_radius) + if iszero(max_trust_radius) + max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u))) + end + initial_trust_radius = convert(trustType, alg.initial_trust_radius) + if iszero(initial_trust_radius) + initial_trust_radius = convert(trustType, max_trust_radius / 11) + end + end + step_threshold = convert(trustType, alg.step_threshold) + shrink_threshold = convert(trustType, alg.shrink_threshold) + expand_threshold = convert(trustType, alg.expand_threshold) + shrink_factor = convert(trustType, alg.shrink_factor) + expand_factor = convert(trustType, alg.expand_factor) + # Parameters for the Schemes - p1 = convert(eltype(u), 0.0) - p2 = convert(eltype(u), 0.0) - p3 = convert(eltype(u), 0.0) - p4 = convert(eltype(u), 0.0) - ϵ = convert(eltype(u), 1.0e-8) - if radius_update_scheme === RadiusUpdateSchemes.Hei - step_threshold = convert(eltype(u), 0.0) - shrink_threshold = convert(eltype(u), 0.25) - expand_threshold = convert(eltype(u), 0.25) - p1 = convert(eltype(u), 5.0) # M - p2 = convert(eltype(u), 0.1) # β - p3 = convert(eltype(u), 0.15) # γ1 - p4 = convert(eltype(u), 0.15) # γ2 - initial_trust_radius = convert(eltype(u), 1.0) + p1 = convert(floatType, 0.0) + p2 = convert(floatType, 0.0) + p3 = convert(floatType, 0.0) + p4 = convert(floatType, 0.0) + ϵ = convert(floatType, 1.0e-8) + if radius_update_scheme === RadiusUpdateSchemes.NLsolve + p1 = convert(floatType, 0.5) + elseif radius_update_scheme === RadiusUpdateSchemes.Hei + step_threshold = convert(trustType, 0.0) + shrink_threshold = convert(trustType, 0.25) + expand_threshold = convert(trustType, 0.25) + p1 = convert(floatType, 5.0) # M + p2 = convert(floatType, 0.1) # β + p3 = convert(floatType, 0.15) # γ1 + p4 = convert(floatType, 0.15) # γ2 + initial_trust_radius = convert(trustType, 1.0) elseif radius_update_scheme === RadiusUpdateSchemes.Yuan - step_threshold = convert(eltype(u), 0.0001) - shrink_threshold = convert(eltype(u), 0.25) - expand_threshold = convert(eltype(u), 0.25) - p1 = convert(eltype(u), 2.0) # μ - p2 = convert(eltype(u), 1 / 6) # c5 - p3 = convert(eltype(u), 6.0) # c6 - p4 = convert(eltype(u), 0.0) + step_threshold = convert(trustType, 0.0001) + shrink_threshold = convert(trustType, 0.25) + expand_threshold = convert(trustType, 0.25) + p1 = convert(floatType, 2.0) # μ + p2 = convert(floatType, 1 / 6) # c5 + p3 = convert(floatType, 6.0) # c6 if iip auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1) else @@ -270,54 +299,58 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion, g = auto_jacvec(x -> f(x, p), u, fu1) end end - initial_trust_radius = convert(eltype(u), p1 * norm(g)) + initial_trust_radius = convert(trustType, p1 * norm(g)) elseif radius_update_scheme === RadiusUpdateSchemes.Fan - step_threshold = convert(eltype(u), 0.0001) - shrink_threshold = convert(eltype(u), 0.25) - expand_threshold = convert(eltype(u), 0.75) - p1 = convert(eltype(u), 0.1) # μ - p2 = convert(eltype(u), 1 / 4) # c5 - p3 = convert(eltype(u), 12) # c6 - p4 = convert(eltype(u), 1.0e18) # M - initial_trust_radius = convert(eltype(u), p1 * (norm(fu1)^0.99)) + step_threshold = convert(trustType, 0.0001) + shrink_threshold = convert(trustType, 0.25) + expand_threshold = convert(trustType, 0.75) + p1 = convert(floatType, 0.1) # μ + p2 = convert(floatType, 0.25) # c5 + p3 = convert(floatType, 12.0) # c6 + p4 = convert(floatType, 1.0e18) # M + initial_trust_radius = convert(trustType, p1 * (norm(fu1)^0.99)) elseif radius_update_scheme === RadiusUpdateSchemes.Bastin - step_threshold = convert(eltype(u), 0.05) - shrink_threshold = convert(eltype(u), 0.05) - expand_threshold = convert(eltype(u), 0.9) - p1 = convert(eltype(u), 2.5) #alpha_1 - p2 = convert(eltype(u), 0.25) # alpha_2 - p3 = convert(eltype(u), 0) # not required - p4 = convert(eltype(u), 0) # not required - initial_trust_radius = convert(eltype(u), 1.0) + step_threshold = convert(trustType, 0.05) + shrink_threshold = convert(trustType, 0.05) + expand_threshold = convert(trustType, 0.9) + p1 = convert(floatType, 2.5) # alpha_1 + p2 = convert(floatType, 0.25) # alpha_2 + initial_trust_radius = convert(trustType, 1.0) end return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu1, fu2, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold, shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new, - H, g, shrink_counter, step_size, du, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ, + H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r, + p1, p2, p3, p4, ϵ, NLStats(1, 0, 0, 0, 0)) end isinplace(::TrustRegionCache{iip}) where {iip} = iip function perform_step!(cache::TrustRegionCache{true}) - @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache + @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache if cache.make_new_J jacobian!!(J, cache) - mul!(cache.H, J, J) - mul!(cache.g, J, fu) + mul!(cache.H, J', J) + mul!(cache.g, J', fu) cache.stats.njacs += 1 + + # do not use A = cache.H, b = _vec(cache.g) since it is equivalent + # to A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular + linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), + linu = _vec(u_gauss_newton), + p = p, reltol = cache.abstol) + cache.linsolve = linres.cache + @. cache.u_gauss_newton = -1 * u_gauss_newton end - linres = dolinsolve(alg.precs, linsolve; A = cache.H, b = _vec(cache.g), - linu = _vec(u_tmp), p, reltol = cache.abstol) - cache.linsolve = linres.cache - cache.u_tmp .= -1 .* u_tmp + # Compute dogleg step dogleg!(cache) # Compute the potentially new u - cache.u_tmp .= u .+ cache.step_size + @. cache.u_tmp = u + cache.du f(cache.fu_new, cache.u_tmp, p) trust_region_step!(cache) cache.stats.nf += 1 @@ -331,18 +364,18 @@ function perform_step!(cache::TrustRegionCache{false}) if make_new_J J = jacobian!!(cache.J, cache) - cache.H = J * J - cache.g = J * fu + cache.H = J' * J + cache.g = J' * fu cache.stats.njacs += 1 + cache.u_gauss_newton = -1 .* (cache.H \ cache.g) end - @unpack g, H = cache # Compute the Newton step. - cache.u_tmp = -H \ g dogleg!(cache) # Compute the potentially new u - cache.u_tmp = u .+ cache.step_size + cache.u_tmp = u + cache.du + cache.fu_new = f(cache.u_tmp, p) trust_region_step!(cache) cache.stats.nf += 1 @@ -355,25 +388,25 @@ function retrospective_step!(cache::TrustRegionCache) @unpack J, fu_prev, fu, u_prev, u = cache J = jacobian!!(deepcopy(J), cache) if J isa Number - cache.H = J * J - cache.g = J * fu + cache.H = J' * J + cache.g = J' * fu else - mul!(cache.H, J, J) - mul!(cache.g, J, fu) + mul!(cache.H, J', J) + mul!(cache.g, J', fu) end cache.stats.njacs += 1 - @unpack H, g, step_size = cache + @unpack H, g, du = cache return -(get_loss(fu_prev) - get_loss(fu)) / - (step_size' * g + step_size' * H * step_size / 2) + (dot(du, g) + dot(du, H, du) / 2) end function trust_region_step!(cache::TrustRegionCache) - @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache + @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache cache.loss_new = get_loss(fu_new) # Compute the ratio of the actual reduction to the predicted reduction. - cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2) + cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2) @unpack r = cache if radius_update_scheme === RadiusUpdateSchemes.Simple @@ -403,6 +436,51 @@ function trust_region_step!(cache::TrustRegionCache) cache.force_stop = true end + elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve + # accept/reject decision + if r > cache.step_threshold # accept + take_step!(cache) + cache.loss = cache.loss_new + cache.make_new_J = true + else # reject + cache.make_new_J = false + end + + # trust region update + if r < 1 // 10 # cache.shrink_threshold + cache.trust_r *= 1 // 2 # cache.shrink_factor + elseif r >= 9 // 10 # cache.expand_threshold + cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) + elseif r >= 1 // 2 # cache.p1 + cache.trust_r = max(cache.trust_r, 2 * norm(cache.du)) # cache.expand_factor * norm(cache.du)) + end + + # convergence test + if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol + cache.force_stop = true + end + + elseif radius_update_scheme === RadiusUpdateSchemes.NocedalWright + # accept/reject decision + if r > cache.step_threshold # accept + take_step!(cache) + cache.loss = cache.loss_new + cache.make_new_J = true + else # reject + cache.make_new_J = false + end + + if r < 1 // 4 + cache.trust_r = (1 // 4) * norm(cache.du) + elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r) / cache.trust_r < 1e-6 + cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r) + end + + # convergence test + if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol + cache.force_stop = true + end + elseif radius_update_scheme === RadiusUpdateSchemes.Hei if r > cache.step_threshold take_step!(cache) @@ -413,14 +491,14 @@ function trust_region_step!(cache::TrustRegionCache) end # Hei's radius update scheme @unpack shrink_threshold, p1, p2, p3, p4 = cache - if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) < + if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) < cache.trust_r cache.shrink_counter += 1 else cache.shrink_counter = 0 end cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) * - cache.internalnorm(step_size) + cache.internalnorm(du) if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol || cache.internalnorm(g) < cache.ϵ @@ -432,7 +510,7 @@ function trust_region_step!(cache::TrustRegionCache) cache.p1 = cache.p2 * cache.p1 cache.shrink_counter += 1 elseif r >= cache.expand_threshold && - cache.internalnorm(step_size) > cache.trust_r / 2 + cache.internalnorm(du) > cache.trust_r / 2 cache.p1 = cache.p3 * cache.p1 cache.shrink_counter = 0 end @@ -481,7 +559,7 @@ function trust_region_step!(cache::TrustRegionCache) cache.loss = cache.loss_new cache.make_new_J = true if retrospective_step!(cache) >= cache.expand_threshold - cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r) + cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r) end else @@ -495,31 +573,63 @@ function trust_region_step!(cache::TrustRegionCache) end end -function dogleg!(cache::TrustRegionCache) - @unpack u_tmp, trust_r = cache +function dogleg!(cache::TrustRegionCache{true}) + @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache + + # Take the full Gauss-Newton step if lies within the trust region. + if norm(u_gauss_newton) ≤ trust_r + cache.du .= u_gauss_newton + return + end + + # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region + l_grad = norm(cache.g) # length of the gradient + d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate + if d_cauchy >= trust_r + @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region + return + end + + # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region + @. u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point + @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation + + a = dot(u_tmp, u_tmp) + b = 2 * dot(u_cauchy, u_tmp) + c = d_cauchy^2 - trust_r^2 + aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues + τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary + + @. cache.du = u_cauchy + τ * u_tmp +end + +function dogleg!(cache::TrustRegionCache{false}) + @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache - # Test if the full step is within the trust region. - if norm(u_tmp) ≤ trust_r - cache.step_size = deepcopy(u_tmp) + # Take the full Gauss-Newton step if lies within the trust region. + if norm(u_gauss_newton) ≤ trust_r + cache.du = deepcopy(u_gauss_newton) return end - # Calcualte Cauchy point, optimum along the steepest descent direction. - δsd = -cache.g - norm_δsd = norm(δsd) - if norm_δsd ≥ trust_r - cache.step_size = δsd .* trust_r / norm_δsd + ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region + l_grad = norm(cache.g) + d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate + if d_cauchy > trust_r # cauchy point lies outside of trust region + cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region return end - # Find the intersection point on the boundary. - N_sd = u_tmp - δsd - dot_N_sd = dot(N_sd, N_sd) - dot_sd_N_sd = dot(δsd, N_sd) - dot_sd = dot(δsd, δsd) - fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2) - τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd - cache.step_size = δsd + τ * N_sd + # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region + u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point + u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg + a = dot(u_tmp, u_tmp) + b = 2 * dot(u_cauchy, u_tmp) + c = d_cauchy^2 - trust_r^2 + aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues + τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary + + cache.du = u_cauchy + τ * u_tmp end function take_step!(cache::TrustRegionCache{true}) diff --git a/test/basictests.jl b/test/basictests.jl index 54e63e93d..e872ec3d1 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -141,8 +141,9 @@ end return solve(prob, TrustRegion(; radius_update_scheme); abstol = 1e-9, kwargs...) end - radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.Hei, - RadiusUpdateSchemes.Yuan, RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin] + radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.NocedalWright, + RadiusUpdateSchemes.NLsolve, RadiusUpdateSchemes.Hei, RadiusUpdateSchemes.Yuan, + RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin] u0s = VERSION ≥ v"1.9" ? ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) : ([1.0, 1.0], 1.0) @testset "[OOP] u0: $(typeof(u0)) radius_update_scheme: $(radius_update_scheme)" for u0 in u0s, @@ -284,7 +285,7 @@ end maxiters) sol_oop = benchmark_nlsolve_oop(quadratic_f, u0; radius_update_scheme, maxiters) - @test sol_iip.u ≈ sol_iip.u + @test sol_iip.u ≈ sol_oop.u end end end @@ -402,3 +403,60 @@ end end end end + +# --- PseudoTransient tests --- + +@testset "PseudoTransient" begin + + #iip + function test_f!(du, u, p) + du[1] = 2 - 2u[1] + du[2] = u[1] - 4u[2] + return du + end + + #oop + simple_test(u, p) = 0.9f0 .* u .- u + + #test jacobian free PseudoTransient method + function f!(res, u, p) + @. res = u * u - p + end + + @testset "[IIP] u0: $(typeof(u0))" for u0 in (zeros(2),) + probN = NonlinearProblem{true}(test_f!, u0) + sol = solve(probN, PseudoTransient()) + @test sol.retcode == ReturnCode.Success + du = zeros(2) + test_f!(du, sol.u, 4.0) + @test du≈[0, 0] atol=1e-7 + end + + @testset "[OOP] u0: $(typeof(u0))" for u0 in ([1.0f0],) + probN = NonlinearProblem{false}(simple_test, u0) + sol = solve(probN, PseudoTransient(alpha_initial = 1.0)) + @test sol.retcode == ReturnCode.Success + @test abs(sol.u[1]) <= 1.0f-4 + end + + @testset "Jacobian Free PT" begin + u0 = [1.0, 1.0] + p = 2.0 + probN = NonlinearProblem(f!, u0, p) + linsolve = LinearSolve.KrylovJL_GMRES() + sol = solve(probN, + PseudoTransient(alpha_initial = 10.0, linsolve = linsolve), + reltol = 1e-9) + @test sol.retcode == ReturnCode.Success + end + + @testset "NewtonRaphson Fails" begin + p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0] + probN = NonlinearProblem{false}(newton_fails, u0, p) + sol = solve(probN, PseudoTransient(alpha_initial = 1.0), abstol = 1e-10) + @test all(abs.(newton_fails(sol.u, p)) .< 1e-10) + end +end + +# Test that `PseudoTransient` passes a test that `NewtonRaphson` fails on.