From 5485267e42827131310562a0480d0c5a31a8e35a Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 24 Sep 2024 14:05:50 -0400 Subject: [PATCH] refactor: migrate to LineSearch.jl --- Project.toml | 2 + docs/src/devdocs/internal_interfaces.md | 7 - src/NonlinearSolve.jl | 98 ++-- src/abstract_types.jl | 22 +- src/algorithms/dfsane.jl | 7 +- src/algorithms/klement.jl | 12 +- src/algorithms/pseudo_transient.jl | 7 +- src/core/approximate_jacobian.jl | 12 +- src/core/generalized_first_order.jl | 24 +- src/core/spectral_methods.jl | 5 +- src/globalization/line_search.jl | 668 +++++++++--------------- 11 files changed, 324 insertions(+), 540 deletions(-) diff --git a/Project.toml b/Project.toml index ab35df1e2..d0c7e9da2 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" +LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" @@ -78,6 +79,7 @@ Hwloc = "3" InteractiveUtils = "<0.0.1, 1" LazyArrays = "1.8.2, 2" LeastSquaresOptim = "0.8.5" +LineSearch = "0.1" LineSearches = "7.2" LinearAlgebra = "1.10" LinearSolve = "2.30" diff --git a/docs/src/devdocs/internal_interfaces.md b/docs/src/devdocs/internal_interfaces.md index 91b9562a8..53e20d754 100644 --- a/docs/src/devdocs/internal_interfaces.md +++ b/docs/src/devdocs/internal_interfaces.md @@ -38,13 +38,6 @@ NonlinearSolve.AbstractDampingFunction NonlinearSolve.AbstractDampingFunctionCache ``` -## Line Search - -```@docs -NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm -NonlinearSolve.AbstractNonlinearSolveLineSearchCache -``` - ## Trust Region ```@docs diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 2e420a761..90387814a 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -30,6 +30,8 @@ using LazyArrays: LazyArrays, ApplyArray, cache using LinearAlgebra: LinearAlgebra, ColumnNorm, Diagonal, I, LowerTriangular, Symmetric, UpperTriangular, axpy!, cond, diag, diagind, dot, issuccess, istril, istriu, lu, mul!, norm, pinv, tril!, triu! +using LineSearch: LineSearch, AbstractLineSearchAlgorithm, AbstractLineSearchCache, + NoLineSearch using LineSearches: LineSearches using LinearSolve: LinearSolve, LUFactorization, QRFactorization, ComposePreconditioner, InvPreconditioner, needs_concrete_A, AbstractFactorization, @@ -103,54 +105,54 @@ include("algorithms/extension_algs.jl") include("utils.jl") include("default.jl") -@setup_workload begin - nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), - (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) - probs_nls = NonlinearProblem[] - for (fn, u0) in nlfuncs - push!(probs_nls, NonlinearProblem(fn, u0, 2.0)) - end - - nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), - PseudoTransient(), Broyden(), Klement(), DFSane(), nothing) - - probs_nlls = NonlinearLeastSquaresProblem[] - nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), - (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), - ( - NonlinearFunction{true}( - (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)), - [0.1, 0.0]), - ( - NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), - resid_prototype = zeros(4)), - [0.1, 0.1])) - for (fn, u0) in nlfuncs - push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) - end - - nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(), - LevenbergMarquardt(; linsolve = LUFactorization()), - GaussNewton(; linsolve = LUFactorization()), - TrustRegion(; linsolve = LUFactorization()), nothing) - - @compile_workload begin - @sync begin - for T in (Float32, Float64), (fn, u0) in nlfuncs - Threads.@spawn NonlinearProblem(fn, T.(u0), T(2)) - end - for (fn, u0) in nlfuncs - Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0) - end - for prob in probs_nls, alg in nls_algs - Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false) - end - for prob in probs_nlls, alg in nlls_algs - Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false) - end - end - end -end +# @setup_workload begin +# nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1), +# (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1])) +# probs_nls = NonlinearProblem[] +# for (fn, u0) in nlfuncs +# push!(probs_nls, NonlinearProblem(fn, u0, 2.0)) +# end + +# nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), +# PseudoTransient(), Broyden(), Klement(), DFSane(), nothing) + +# probs_nlls = NonlinearLeastSquaresProblem[] +# nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]), +# (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]), +# ( +# NonlinearFunction{true}( +# (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)), +# [0.1, 0.0]), +# ( +# NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p), +# resid_prototype = zeros(4)), +# [0.1, 0.1])) +# for (fn, u0) in nlfuncs +# push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0)) +# end + +# nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(), +# LevenbergMarquardt(; linsolve = LUFactorization()), +# GaussNewton(; linsolve = LUFactorization()), +# TrustRegion(; linsolve = LUFactorization()), nothing) + +# @compile_workload begin +# @sync begin +# for T in (Float32, Float64), (fn, u0) in nlfuncs +# Threads.@spawn NonlinearProblem(fn, T.(u0), T(2)) +# end +# for (fn, u0) in nlfuncs +# Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0) +# end +# for prob in probs_nls, alg in nls_algs +# Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false) +# end +# for prob in probs_nlls, alg in nlls_algs +# Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false) +# end +# end +# end +# end # Core Algorithms export NewtonRaphson, PseudoTransient, Klement, Broyden, LimitedMemoryBroyden, DFSane diff --git a/src/abstract_types.jl b/src/abstract_types.jl index 2fe2fd071..0bb2e13d2 100644 --- a/src/abstract_types.jl +++ b/src/abstract_types.jl @@ -106,22 +106,6 @@ function last_step_accepted(cache::AbstractDescentCache) return true end -""" - AbstractNonlinearSolveLineSearchAlgorithm - -Abstract Type for all Line Search Algorithms used in NonlinearSolve.jl. - -### `__internal_init` specification - -```julia -__internal_init( - prob::AbstractNonlinearProblem, alg::AbstractNonlinearSolveLineSearchAlgorithm, f::F, - fu, u, p, args...; internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} --> -AbstractNonlinearSolveLineSearchCache -``` -""" -abstract type AbstractNonlinearSolveLineSearchAlgorithm end - """ AbstractNonlinearSolveLineSearchCache @@ -512,9 +496,9 @@ SciMLBase.isinplace(::AbstractNonlinearSolveJacobianCache{iip}) where {iip} = ii abstract type AbstractNonlinearSolveTraceLevel end # Default Printing -for aType in (AbstractTrustRegionMethod, AbstractNonlinearSolveLineSearchAlgorithm, - AbstractResetCondition, AbstractApproximateJacobianUpdateRule, - AbstractDampingFunction, AbstractNonlinearSolveExtensionAlgorithm) +for aType in (AbstractTrustRegionMethod, AbstractResetCondition, + AbstractApproximateJacobianUpdateRule, AbstractDampingFunction, + AbstractNonlinearSolveExtensionAlgorithm) @eval function Base.show(io::IO, alg::$(aType)) print(io, "$(nameof(typeof(alg)))()") end diff --git a/src/algorithms/dfsane.jl b/src/algorithms/dfsane.jl index b42544055..7a96cf3d2 100644 --- a/src/algorithms/dfsane.jl +++ b/src/algorithms/dfsane.jl @@ -19,8 +19,9 @@ For other keyword arguments, see [`RobustNonMonotoneLineSearch`](@ref). function DFSane(; σ_min = 1 // 10^10, σ_max = 1e10, σ_1 = 1, M::Int = 10, γ = 1 // 10^4, τ_min = 1 // 10, τ_max = 1 // 2, n_exp::Int = 2, max_inner_iterations::Int = 100, η_strategy::ETA = (fn_1, n, x_n, f_n) -> fn_1 / n^2) where {ETA} - linesearch = RobustNonMonotoneLineSearch(; - gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max, - n_exp, η_strategy, maxiters = max_inner_iterations) + # linesearch = RobustNonMonotoneLineSearch(; + # gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max, + # n_exp, η_strategy, maxiters = max_inner_iterations) + linesearch = NoLineSearch() return GeneralizedDFSane{:DFSane}(linesearch, σ_min, σ_max, nothing) end diff --git a/src/algorithms/klement.jl b/src/algorithms/klement.jl index 66daec1b7..ef21fa2ab 100644 --- a/src/algorithms/klement.jl +++ b/src/algorithms/klement.jl @@ -27,12 +27,12 @@ over this. function Klement(; max_resets::Int = 100, linsolve = nothing, alpha = nothing, linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing, init_jacobian::Val{IJ} = Val(:identity)) where {IJ} - if !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) - Base.depwarn( - "Passing in a `LineSearches.jl` algorithm directly is deprecated. \ - Please use `LineSearchesJL` instead.", :Klement) - linesearch = LineSearchesJL(; method = linesearch) - end + # if !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + # Base.depwarn( + # "Passing in a `LineSearches.jl` algorithm directly is deprecated. \ + # Please use `LineSearchesJL` instead.", :Klement) + # linesearch = LineSearchesJL(; method = linesearch) + # end if IJ === :identity initialization = IdentityInitialization(alpha, DiagonalStructure()) diff --git a/src/algorithms/pseudo_transient.jl b/src/algorithms/pseudo_transient.jl index fda39a3b9..0da85dd94 100644 --- a/src/algorithms/pseudo_transient.jl +++ b/src/algorithms/pseudo_transient.jl @@ -1,7 +1,6 @@ """ PseudoTransient(; concrete_jac = nothing, linsolve = nothing, - linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(), - precs = DEFAULT_PRECS, autodiff = nothing) + linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing) An implementation of PseudoTransient Method [coffey2003pseudotransient](@cite) that is used to solve steady state problems in an accelerated manner. It uses an adaptive time-stepping @@ -16,8 +15,8 @@ This implementation specifically uses "switched evolution relaxation" you are going to need more iterations to converge but it can be more stable. """ function PseudoTransient(; concrete_jac = nothing, linsolve = nothing, - linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(), - precs = DEFAULT_PRECS, autodiff = nothing, alpha_initial = 1e-3) + linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing, + alpha_initial = 1e-3) descent = DampedNewtonDescent(; linsolve, precs, initial_damping = alpha_initial, damping_fn = SwitchedEvolutionRelaxation()) return GeneralizedFirstOrderAlgorithm(; diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl index e61f34032..24a0a32ed 100644 --- a/src/core/approximate_jacobian.jl +++ b/src/core/approximate_jacobian.jl @@ -59,12 +59,12 @@ function ApproximateJacobianSolveAlgorithm{concrete_jac, name}(; linesearch = missing, trustregion = missing, descent, update_rule, reinit_rule, initialization, max_resets::Int = typemax(Int), max_shrink_times::Int = typemax(Int)) where {concrete_jac, name} - if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) - Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ - Please use `LineSearchesJL` instead.", - :GeneralizedFirstOrderAlgorithm) - linesearch = LineSearchesJL(; method = linesearch) - end + # if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + # Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ + # Please use `LineSearchesJL` instead.", + # :GeneralizedFirstOrderAlgorithm) + # linesearch = LineSearchesJL(; method = linesearch) + # end return ApproximateJacobianSolveAlgorithm{concrete_jac, name}( linesearch, trustregion, descent, update_rule, reinit_rule, max_resets, max_shrink_times, initialization) diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl index 38c3786f5..a798b2064 100644 --- a/src/core/generalized_first_order.jl +++ b/src/core/generalized_first_order.jl @@ -66,12 +66,12 @@ function GeneralizedFirstOrderAlgorithm{concrete_jac, name}(; jacobian_ad !== nothing && ADTypes.mode(jacobian_ad) isa ADTypes.ReverseMode, jacobian_ad, nothing)) - if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) - Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ - Please use `LineSearchesJL` instead.", - :GeneralizedFirstOrderAlgorithm) - linesearch = LineSearchesJL(; method = linesearch) - end + # if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm) + # Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \ + # Please use `LineSearchesJL` instead.", + # :GeneralizedFirstOrderAlgorithm) + # linesearch = LineSearchesJL(; method = linesearch) + # end return GeneralizedFirstOrderAlgorithm{concrete_jac, name}( linesearch, trustregion, descent, max_shrink_times, @@ -199,8 +199,11 @@ function SciMLBase.__init( if alg.linesearch !== missing supports_line_search(alg.descent) || error("Line Search not supported by \ $(alg.descent).") - linesearch_cache = __internal_init( - prob, alg.linesearch, f, fu, u, p; stats, internalnorm, kwargs...) + linesearch_ad = alg.forward_ad === nothing ? + (alg.reverse_ad === nothing ? alg.jacobian_ad : + alg.reverse_ad) : alg.forward_ad + linesearch_cache = init( + prob, alg.linesearch, fu, u; stats, autodiff = linesearch_ad, kwargs...) GB = :LineSearch end @@ -264,8 +267,9 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB}; cache.make_new_jacobian = true if GB === :LineSearch @static_timeit cache.timer "linesearch" begin - linesearch_failed, α = __internal_solve!( - cache.linesearch_cache, cache.u, δu) + linesearch_sol = solve!(cache.linesearch_cache, cache.u, δu) + linesearch_failed = !SciMLBase.successful_retcode(linesearch_sol.retcode) + α = linesearch_sol.step_size end if linesearch_failed cache.retcode = ReturnCode.InternalLineSearchFailed diff --git a/src/core/spectral_methods.jl b/src/core/spectral_methods.jl index 85e58c1a3..cf67a321b 100644 --- a/src/core/spectral_methods.jl +++ b/src/core/spectral_methods.jl @@ -9,9 +9,8 @@ Method. ### Arguments - - `linesearch`: Globalization using a Line Search Method. This needs to follow the - [`NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm`](@ref) interface. This - is not optional currently, but that restriction might be lifted in the future. + - `linesearch`: Globalization using a Line Search Method. This is not optional currently, + but that restriction might be lifted in the future. - `σ_min`: The minimum spectral parameter allowed. This is used to ensure that the spectral parameter is not too small. - `σ_max`: The maximum spectral parameter allowed. This is used to ensure that the diff --git a/src/globalization/line_search.jl b/src/globalization/line_search.jl index 5de4610b6..f02f9fd28 100644 --- a/src/globalization/line_search.jl +++ b/src/globalization/line_search.jl @@ -1,439 +1,239 @@ -""" - NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm - -Don't perform a line search. Just return the initial step length of `1`. -""" -struct NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm end - -@concrete mutable struct NoLineSearchCache <: AbstractNonlinearSolveLineSearchCache - α -end - -function __internal_init(prob::AbstractNonlinearProblem, alg::NoLineSearch, - f::F, fu, u, p, args...; kwargs...) where {F} - return NoLineSearchCache(promote_type(eltype(fu), eltype(u))(true)) -end - -reinit_cache!(cache::NoLineSearchCache, args...; p = cache.p, kwargs...) = nothing - -__internal_solve!(cache::NoLineSearchCache, u, du) = false, cache.α - -""" - LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true) - -Wrapper over algorithms from -[LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl/). Allows automatic -construction of the objective functions for the line search algorithms utilizing automatic -differentiation for fast Vector Jacobian Products. - -### Arguments - - - `method`: the line search algorithm to use. Defaults to - `method = LineSearches.Static()`, which means that the step size is fixed to the value - of `alpha`. - - `autodiff`: the automatic differentiation backend to use for the line search. Using a - reverse mode automatic differentiation backend if recommended. - - `α`: the initial step size to use. Defaults to `true` (which is equivalent to `1`). -""" -@concrete struct LineSearchesJL <: AbstractNonlinearSolveLineSearchAlgorithm - method - initial_alpha - autodiff -end - -function Base.show(io::IO, alg::LineSearchesJL) - str = "$(nameof(typeof(alg)))(" - modifiers = String[] - __is_present(alg.autodiff) && - push!(modifiers, "autodiff = $(nameof(typeof(alg.autodiff)))()") - alg.initial_alpha != true && push!(modifiers, "initial_alpha = $(alg.initial_alpha)") - push!(modifiers, "method = $(nameof(typeof(alg.method)))()") - print(io, str, join(modifiers, ", "), ")") -end - LineSearchesJL(method; kwargs...) = LineSearchesJL(; method, kwargs...) function LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true) - if method isa LineSearchesJL # Prevent breaking old code - return LineSearchesJL(method.method, α, autodiff) - end - - if method isa AbstractNonlinearSolveLineSearchAlgorithm - Base.depwarn("Passing a native NonlinearSolve line search algorithm to \ - `LineSearchesJL` or `LineSearch` is deprecated. Pass the method \ - directly instead.", - :LineSearchesJL) - return method - end - return LineSearchesJL(method, α, autodiff) -end - -Base.@deprecate_binding LineSearch LineSearchesJL true - -Static(args...; kwargs...) = LineSearchesJL(LineSearches.Static(args...; kwargs...)) -HagerZhang(args...; kwargs...) = LineSearchesJL(LineSearches.HagerZhang(args...; kwargs...)) -function MoreThuente(args...; kwargs...) - return LineSearchesJL(LineSearches.MoreThuente(args...; kwargs...)) -end -function BackTracking(args...; kwargs...) - return LineSearchesJL(LineSearches.BackTracking(args...; kwargs...)) -end -function StrongWolfe(args...; kwargs...) - return LineSearchesJL(LineSearches.StrongWolfe(args...; kwargs...)) -end - -# Wrapper over LineSearches.jl algorithms -@concrete mutable struct LineSearchesJLCache <: AbstractNonlinearSolveLineSearchCache - f - p - ϕ - dϕ - ϕdϕ - method - alpha - deriv_op - u_cache - fu_cache - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::LineSearchesJL, f::F, fu, u, p, - args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - T = promote_type(eltype(fu), eltype(u)) - if u isa Number - autodiff = get_concrete_forward_ad(alg.autodiff, prob; check_forward_mode = true) - if !(autodiff isa AutoForwardDiff || - autodiff isa AutoPolyesterForwardDiff || - autodiff isa AutoFiniteDiff) - autodiff = AutoFiniteDiff() - # Other cases are not properly supported so we fallback to finite differencing - @warn "Scalar AD is supported only for AutoForwardDiff and AutoFiniteDiff. \ - Detected $(autodiff). Falling back to AutoFiniteDiff." - end - deriv_op = @closure (du, u, fu, p) -> last(__value_derivative( - autodiff, Base.Fix2(f, p), u)) * - fu * - du - else - # Both forward and reverse AD can be used for line-search. - # We prefer forward AD for better performance, however, reverse AD is also supported if user explicitly requests it. - # 1. If jvp is available, we use forward AD; - # 2. If vjp is available, we use reverse AD; - # 3. If reverse type is requested, we use reverse AD; - # 4. Finally, we use forward AD. - if alg.autodiff isa AutoFiniteDiff - deriv_op = nothing - elseif SciMLBase.has_jvp(f) - if isinplace(prob) - jvp_cache = zero(fu) - deriv_op = @closure (du, u, fu, p) -> begin - f.jvp(jvp_cache, du, u, p) - dot(fu, jvp_cache) - end - else - deriv_op = @closure (du, u, fu, p) -> dot(fu, f.jvp(du, u, p)) - end - elseif SciMLBase.has_vjp(f) - if isinplace(prob) - vjp_cache = zero(u) - deriv_op = @closure (du, u, fu, p) -> begin - f.vjp(vjp_cache, fu, u, p) - dot(du, vjp_cache) - end - else - deriv_op = @closure (du, u, fu, p) -> dot(du, f.vjp(fu, u, p)) - end - elseif alg.autodiff !== nothing && - ADTypes.mode(alg.autodiff) isa ADTypes.ReverseMode - autodiff = get_concrete_reverse_ad( - alg.autodiff, prob; check_reverse_mode = true) - vjp_op = VecJacOperator(prob, fu, u; autodiff) - if isinplace(prob) - vjp_cache = zero(u) - deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(vjp_cache, fu, u, p)) - else - deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(fu, u, p)) - end - else - autodiff = get_concrete_forward_ad( - alg.autodiff, prob; check_forward_mode = true) - jvp_op = JacVecOperator(prob, fu, u; autodiff) - if isinplace(prob) - jvp_cache = zero(fu) - deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(jvp_cache, du, u, p)) - else - deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(du, u, p)) - end - end - end - - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return @fastmath internalnorm(fu_cache)^2 / 2 - end - - dϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return deriv_op(du, u_cache, fu_cache, p) - end - - ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - deriv = deriv_op(du, u_cache, fu_cache, p) - obj = @fastmath internalnorm(fu_cache)^2 / 2 - return obj, deriv - end - - return LineSearchesJLCache(f, p, ϕ, dϕ, ϕdϕ, alg.method, T(alg.initial_alpha), - deriv_op, u_cache, fu_cache, stats) -end - -function __internal_solve!(cache::LineSearchesJLCache, u, du; kwargs...) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - if cache.deriv_op !== nothing - dϕ = @closure α -> cache.dϕ( - cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) - ϕdϕ = @closure α -> cache.ϕdϕ( - cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op) - else - dϕ = @closure α -> FiniteDiff.finite_difference_derivative(ϕ, α) - ϕdϕ = @closure α -> (ϕ(α), FiniteDiff.finite_difference_derivative(ϕ, α)) - end - - ϕ₀, dϕ₀ = ϕdϕ(zero(eltype(u))) - - # Here we should be resetting the search direction for some algorithms especially - # if we start mixing in jacobian reuse and such - dϕ₀ ≥ 0 && return (true, one(eltype(u))) - - # We can technically reduce 1 axpy by reusing the returned value from cache.method - # but it's not worth the extra complexity - cache.alpha = first(cache.method(ϕ, dϕ, ϕdϕ, cache.alpha, ϕ₀, dϕ₀)) - return (false, cache.alpha) -end - -""" - RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10, - tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100, - η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2) - -Robust NonMonotone Line Search is a derivative free line search method from DF Sane -[la2006spectral](@cite). - -### Keyword Arguments - - - `M`: The monotonicity of the algorithm is determined by a this positive integer. - A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm - of the function `f`. However, higher values allow for more flexibility in this reduction. - Despite this, the algorithm still ensures global convergence through the use of a - non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi - condition. Values in the range of 5 to 20 are usually sufficient, but some cases may - call for a higher value of `M`. The default setting is 10. - - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value - of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to - `1e-4`. - - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and - this parameter is the minimum value of that factor. Defaults to `0.1`. - - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and - this parameter is the maximum value of that factor. Defaults to `0.5`. - - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses - `n_exp ∈ {1, 2}`. Defaults to `2`. - - `η_strategy`: function to determine the parameter `η`, which enables growth - of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized - as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current - `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``. - Defaults to ``fn_1 / n^2``. - - `maxiters`: the maximum number of iterations allowed for the inner loop of the - algorithm. Defaults to `100`. -""" -@kwdef @concrete struct RobustNonMonotoneLineSearch <: - AbstractNonlinearSolveLineSearchAlgorithm - gamma = 1 // 10000 - sigma_1 = 1 - M::Int = 10 - tau_min = 1 // 10 - tau_max = 1 // 2 - n_exp::Int = 2 - maxiters::Int = 100 - η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2 -end - -@concrete mutable struct RobustNonMonotoneLineSearchCache <: - AbstractNonlinearSolveLineSearchCache - f - p - ϕ - u_cache - fu_cache - internalnorm - maxiters::Int - history - γ - σ₁ - M::Int - τ_min - τ_max - nsteps::Int - η_strategy - n_exp::Int - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u, - p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - T = promote_type(eltype(fu), eltype(u)) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return internalnorm(fu_cache)^alg.n_exp - end - - fn₁ = internalnorm(fu)^alg.n_exp - η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ) - - return RobustNonMonotoneLineSearchCache( - f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters, - fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M, - T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats) -end - -function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...) - T = promote_type(eltype(u), eltype(du)) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - f_norm_old = ϕ(eltype(u)(0)) - α₊, α₋ = T(cache.σ₁), T(cache.σ₁) - η = cache.η_strategy(cache.nsteps, u, f_norm_old) - f_bar = maximum(cache.history) - - for k in 1:(cache.maxiters) - f_norm = ϕ(α₊) - f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊) - - α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), - cache.τ_min, cache.τ_max) - - f_norm = ϕ(-α₋) - f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋) - - α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), - cache.τ_min, cache.τ_max) + Base.depwarn("`LineSearchesJL(...)` is deprecated. Please use `LineSearchesJL` from \ + LineSearch.jl instead.", + :LineSearchesJL) + + # Prevent breaking old code + method isa LineSearch.LineSearchesJL && + return LineSearch.LineSearchesJL(method.method, α, autodiff) + method isa AbstractLineSearchAlgorithm && return method + return LineSearch.LineSearchesJL(method, α, autodiff) +end + +for alg in (:Static, :HagerZhang, :MoreThuente, :BackTracking, :StrongWolfe) + depmsg = "`$(alg)(args...; kwargs...)` is deprecated. Please use `LineSearchesJL(; \ + method = $(alg)(args...; kwargs...))` instead." + @eval function $(alg)(args...; autodiff = nothing, initial_alpha = true, kwargs...) + Base.depwarn($(depmsg), $(alg)) + return LineSearch.LineSearchesJL(; + method = LineSearches.$(alg)(args...; kwargs...), autodiff, initial_alpha) end - - return true, T(cache.σ₁) end -function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...) - fu = get_fu(topcache) - cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp - cache.nsteps += 1 - return -end - -""" - LiFukushimaLineSearch(; lambda_0 = 1, beta = 1 // 2, sigma_1 = 1 // 1000, - sigma_2 = 1 // 1000, eta = 1 // 10, nan_max_iter::Int = 5, maxiters::Int = 100) - -A derivative-free line search and global convergence of Broyden-like method for nonlinear -equations [li2000derivative](@cite). -""" -@kwdef @concrete struct LiFukushimaLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm - lambda_0 = 1 - beta = 1 // 2 - sigma_1 = 1 // 1000 - sigma_2 = 1 // 1000 - eta = 1 // 10 - rho = 9 // 10 - nan_max_iter::Int = 5 # TODO (breaking): Change this to nan_maxiters for uniformity - maxiters::Int = 100 -end - -@concrete mutable struct LiFukushimaLineSearchCache <: AbstractNonlinearSolveLineSearchCache - ϕ - f - p - internalnorm - u_cache - fu_cache - λ₀ - β - σ₁ - σ₂ - η - ρ - α - nan_maxiters::Int - maxiters::Int - stats::NLStats -end - -function __internal_init( - prob::AbstractNonlinearProblem, alg::LiFukushimaLineSearch, f::F, fu, u, p, - args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} - @bb u_cache = similar(u) - @bb fu_cache = similar(fu) - T = promote_type(eltype(fu), eltype(u)) - - ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin - @bb @. u_cache = u + α * du - fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) - stats.nf += 1 - return internalnorm(fu_cache) - end - - return LiFukushimaLineSearchCache( - ϕ, f, p, internalnorm, u_cache, fu_cache, T(alg.lambda_0), - T(alg.beta), T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), - T(alg.rho), T(true), alg.nan_max_iter, alg.maxiters, stats) -end - -function __internal_solve!(cache::LiFukushimaLineSearchCache, u, du; kwargs...) - T = promote_type(eltype(u), eltype(du)) - ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) - - fx_norm = ϕ(T(0)) - - # Non-Blocking exit if the norm is NaN or Inf - !isfinite(fx_norm) && return (true, cache.α) - - # Early Terminate based on Eq. 2.7 - du_norm = cache.internalnorm(du) - fxλ_norm = ϕ(cache.α) - fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return (false, cache.α) - - λ₂, λ₁ = cache.λ₀, cache.λ₀ - fxλp_norm = ϕ(λ₂) - - if !isfinite(fxλp_norm) - nan_converged = false - for _ in 1:(cache.nan_maxiters) - λ₁, λ₂ = λ₂, cache.β * λ₂ - fxλp_norm = ϕ(λ₂) - nan_converged = isfinite(fxλp_norm) - nan_converged && break - end - nan_converged || return (true, cache.α) - end - - for i in 1:(cache.maxiters) - fxλp_norm = ϕ(λ₂) - converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2 - converged && return (false, λ₂) - λ₁, λ₂ = λ₂, cache.β * λ₂ - end - - return true, cache.α -end +# """ +# RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10, +# tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100, +# η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2) + +# Robust NonMonotone Line Search is a derivative free line search method from DF Sane +# [la2006spectral](@cite). + +# ### Keyword Arguments + +# - `M`: The monotonicity of the algorithm is determined by a this positive integer. +# A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm +# of the function `f`. However, higher values allow for more flexibility in this reduction. +# Despite this, the algorithm still ensures global convergence through the use of a +# non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi +# condition. Values in the range of 5 to 20 are usually sufficient, but some cases may +# call for a higher value of `M`. The default setting is 10. +# - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value +# of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to +# `1e-4`. +# - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and +# this parameter is the minimum value of that factor. Defaults to `0.1`. +# - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and +# this parameter is the maximum value of that factor. Defaults to `0.5`. +# - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses +# `n_exp ∈ {1, 2}`. Defaults to `2`. +# - `η_strategy`: function to determine the parameter `η`, which enables growth +# of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized +# as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current +# `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``. +# Defaults to ``fn_1 / n^2``. +# - `maxiters`: the maximum number of iterations allowed for the inner loop of the +# algorithm. Defaults to `100`. +# """ +# @kwdef @concrete struct RobustNonMonotoneLineSearch <: +# AbstractNonlinearSolveLineSearchAlgorithm +# gamma = 1 // 10000 +# sigma_1 = 1 +# M::Int = 10 +# tau_min = 1 // 10 +# tau_max = 1 // 2 +# n_exp::Int = 2 +# maxiters::Int = 100 +# η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2 +# end + +# @concrete mutable struct RobustNonMonotoneLineSearchCache <: +# AbstractNonlinearSolveLineSearchCache +# f +# p +# ϕ +# u_cache +# fu_cache +# internalnorm +# maxiters::Int +# history +# γ +# σ₁ +# M::Int +# τ_min +# τ_max +# nsteps::Int +# η_strategy +# n_exp::Int +# stats::NLStats +# end + +# function __internal_init( +# prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u, +# p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} +# @bb u_cache = similar(u) +# @bb fu_cache = similar(fu) +# T = promote_type(eltype(fu), eltype(u)) + +# ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin +# @bb @. u_cache = u + α * du +# fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) +# stats.nf += 1 +# return internalnorm(fu_cache)^alg.n_exp +# end + +# fn₁ = internalnorm(fu)^alg.n_exp +# η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ) + +# return RobustNonMonotoneLineSearchCache( +# f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters, +# fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M, +# T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats) +# end + +# function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...) +# T = promote_type(eltype(u), eltype(du)) +# ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) +# f_norm_old = ϕ(eltype(u)(0)) +# α₊, α₋ = T(cache.σ₁), T(cache.σ₁) +# η = cache.η_strategy(cache.nsteps, u, f_norm_old) +# f_bar = maximum(cache.history) + +# for k in 1:(cache.maxiters) +# f_norm = ϕ(α₊) +# f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊) + +# α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old), +# cache.τ_min, cache.τ_max) + +# f_norm = ϕ(-α₋) +# f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋) + +# α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old), +# cache.τ_min, cache.τ_max) +# end + +# return true, T(cache.σ₁) +# end + +# function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...) +# fu = get_fu(topcache) +# cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp +# cache.nsteps += 1 +# return +# end + +# """ +# LiFukushimaLineSearch(; lambda_0 = 1, beta = 1 // 2, sigma_1 = 1 // 1000, +# sigma_2 = 1 // 1000, eta = 1 // 10, nan_max_iter::Int = 5, maxiters::Int = 100) + +# A derivative-free line search and global convergence of Broyden-like method for nonlinear +# equations [li2000derivative](@cite). +# """ +# @kwdef @concrete struct LiFukushimaLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm +# lambda_0 = 1 +# beta = 1 // 2 +# sigma_1 = 1 // 1000 +# sigma_2 = 1 // 1000 +# eta = 1 // 10 +# rho = 9 // 10 +# nan_max_iter::Int = 5 # TODO (breaking): Change this to nan_maxiters for uniformity +# maxiters::Int = 100 +# end + +# @concrete mutable struct LiFukushimaLineSearchCache <: AbstractNonlinearSolveLineSearchCache +# ϕ +# f +# p +# internalnorm +# u_cache +# fu_cache +# λ₀ +# β +# σ₁ +# σ₂ +# η +# ρ +# α +# nan_maxiters::Int +# maxiters::Int +# stats::NLStats +# end + +# function __internal_init( +# prob::AbstractNonlinearProblem, alg::LiFukushimaLineSearch, f::F, fu, u, p, +# args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} +# @bb u_cache = similar(u) +# @bb fu_cache = similar(fu) +# T = promote_type(eltype(fu), eltype(u)) + +# ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin +# @bb @. u_cache = u + α * du +# fu_cache = evaluate_f!!(f, fu_cache, u_cache, p) +# stats.nf += 1 +# return internalnorm(fu_cache) +# end + +# return LiFukushimaLineSearchCache( +# ϕ, f, p, internalnorm, u_cache, fu_cache, T(alg.lambda_0), +# T(alg.beta), T(alg.sigma_1), T(alg.sigma_2), T(alg.eta), +# T(alg.rho), T(true), alg.nan_max_iter, alg.maxiters, stats) +# end + +# function __internal_solve!(cache::LiFukushimaLineSearchCache, u, du; kwargs...) +# T = promote_type(eltype(u), eltype(du)) +# ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache) + +# fx_norm = ϕ(T(0)) + +# # Non-Blocking exit if the norm is NaN or Inf +# !isfinite(fx_norm) && return (true, cache.α) + +# # Early Terminate based on Eq. 2.7 +# du_norm = cache.internalnorm(du) +# fxλ_norm = ϕ(cache.α) +# fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return (false, cache.α) + +# λ₂, λ₁ = cache.λ₀, cache.λ₀ +# fxλp_norm = ϕ(λ₂) + +# if !isfinite(fxλp_norm) +# nan_converged = false +# for _ in 1:(cache.nan_maxiters) +# λ₁, λ₂ = λ₂, cache.β * λ₂ +# fxλp_norm = ϕ(λ₂) +# nan_converged = isfinite(fxλp_norm) +# nan_converged && break +# end +# nan_converged || return (true, cache.α) +# end + +# for i in 1:(cache.maxiters) +# fxλp_norm = ϕ(λ₂) +# converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2 +# converged && return (false, λ₂) +# λ₁, λ₂ = λ₂, cache.β * λ₂ +# end + +# return true, cache.α +# end