From 5485267e42827131310562a0480d0c5a31a8e35a Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Tue, 24 Sep 2024 14:05:50 -0400
Subject: [PATCH] refactor: migrate to LineSearch.jl

---
 Project.toml                            |   2 +
 docs/src/devdocs/internal_interfaces.md |   7 -
 src/NonlinearSolve.jl                   |  98 ++--
 src/abstract_types.jl                   |  22 +-
 src/algorithms/dfsane.jl                |   7 +-
 src/algorithms/klement.jl               |  12 +-
 src/algorithms/pseudo_transient.jl      |   7 +-
 src/core/approximate_jacobian.jl        |  12 +-
 src/core/generalized_first_order.jl     |  24 +-
 src/core/spectral_methods.jl            |   5 +-
 src/globalization/line_search.jl        | 668 +++++++++---------------
 11 files changed, 324 insertions(+), 540 deletions(-)

diff --git a/Project.toml b/Project.toml
index ab35df1e2..d0c7e9da2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,6 +13,7 @@ FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
+LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
@@ -78,6 +79,7 @@ Hwloc = "3"
 InteractiveUtils = "<0.0.1, 1"
 LazyArrays = "1.8.2, 2"
 LeastSquaresOptim = "0.8.5"
+LineSearch = "0.1"
 LineSearches = "7.2"
 LinearAlgebra = "1.10"
 LinearSolve = "2.30"
diff --git a/docs/src/devdocs/internal_interfaces.md b/docs/src/devdocs/internal_interfaces.md
index 91b9562a8..53e20d754 100644
--- a/docs/src/devdocs/internal_interfaces.md
+++ b/docs/src/devdocs/internal_interfaces.md
@@ -38,13 +38,6 @@ NonlinearSolve.AbstractDampingFunction
 NonlinearSolve.AbstractDampingFunctionCache
 ```
 
-## Line Search
-
-```@docs
-NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm
-NonlinearSolve.AbstractNonlinearSolveLineSearchCache
-```
-
 ## Trust Region
 
 ```@docs
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 2e420a761..90387814a 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -30,6 +30,8 @@ using LazyArrays: LazyArrays, ApplyArray, cache
 using LinearAlgebra: LinearAlgebra, ColumnNorm, Diagonal, I, LowerTriangular, Symmetric,
                      UpperTriangular, axpy!, cond, diag, diagind, dot, issuccess, istril,
                      istriu, lu, mul!, norm, pinv, tril!, triu!
+using LineSearch: LineSearch, AbstractLineSearchAlgorithm, AbstractLineSearchCache,
+                  NoLineSearch
 using LineSearches: LineSearches
 using LinearSolve: LinearSolve, LUFactorization, QRFactorization, ComposePreconditioner,
                    InvPreconditioner, needs_concrete_A, AbstractFactorization,
@@ -103,54 +105,54 @@ include("algorithms/extension_algs.jl")
 include("utils.jl")
 include("default.jl")
 
-@setup_workload begin
-    nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1),
-        (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1]))
-    probs_nls = NonlinearProblem[]
-    for (fn, u0) in nlfuncs
-        push!(probs_nls, NonlinearProblem(fn, u0, 2.0))
-    end
-
-    nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
-        PseudoTransient(), Broyden(), Klement(), DFSane(), nothing)
-
-    probs_nlls = NonlinearLeastSquaresProblem[]
-    nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
-        (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]),
-        (
-            NonlinearFunction{true}(
-                (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)),
-            [0.1, 0.0]),
-        (
-            NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
-                resid_prototype = zeros(4)),
-            [0.1, 0.1]))
-    for (fn, u0) in nlfuncs
-        push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0))
-    end
-
-    nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(),
-        LevenbergMarquardt(; linsolve = LUFactorization()),
-        GaussNewton(; linsolve = LUFactorization()),
-        TrustRegion(; linsolve = LUFactorization()), nothing)
-
-    @compile_workload begin
-        @sync begin
-            for T in (Float32, Float64), (fn, u0) in nlfuncs
-                Threads.@spawn NonlinearProblem(fn, T.(u0), T(2))
-            end
-            for (fn, u0) in nlfuncs
-                Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0)
-            end
-            for prob in probs_nls, alg in nls_algs
-                Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
-            end
-            for prob in probs_nlls, alg in nlls_algs
-                Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
-            end
-        end
-    end
-end
+# @setup_workload begin
+#     nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1),
+#         (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1]))
+#     probs_nls = NonlinearProblem[]
+#     for (fn, u0) in nlfuncs
+#         push!(probs_nls, NonlinearProblem(fn, u0, 2.0))
+#     end
+
+#     nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
+#         PseudoTransient(), Broyden(), Klement(), DFSane(), nothing)
+
+#     probs_nlls = NonlinearLeastSquaresProblem[]
+#     nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
+#         (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]),
+#         (
+#             NonlinearFunction{true}(
+#                 (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)),
+#             [0.1, 0.0]),
+#         (
+#             NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
+#                 resid_prototype = zeros(4)),
+#             [0.1, 0.1]))
+#     for (fn, u0) in nlfuncs
+#         push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0))
+#     end
+
+#     nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(),
+#         LevenbergMarquardt(; linsolve = LUFactorization()),
+#         GaussNewton(; linsolve = LUFactorization()),
+#         TrustRegion(; linsolve = LUFactorization()), nothing)
+
+#     @compile_workload begin
+#         @sync begin
+#             for T in (Float32, Float64), (fn, u0) in nlfuncs
+#                 Threads.@spawn NonlinearProblem(fn, T.(u0), T(2))
+#             end
+#             for (fn, u0) in nlfuncs
+#                 Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0)
+#             end
+#             for prob in probs_nls, alg in nls_algs
+#                 Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
+#             end
+#             for prob in probs_nlls, alg in nlls_algs
+#                 Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
+#             end
+#         end
+#     end
+# end
 
 # Core Algorithms
 export NewtonRaphson, PseudoTransient, Klement, Broyden, LimitedMemoryBroyden, DFSane
diff --git a/src/abstract_types.jl b/src/abstract_types.jl
index 2fe2fd071..0bb2e13d2 100644
--- a/src/abstract_types.jl
+++ b/src/abstract_types.jl
@@ -106,22 +106,6 @@ function last_step_accepted(cache::AbstractDescentCache)
     return true
 end
 
-"""
-    AbstractNonlinearSolveLineSearchAlgorithm
-
-Abstract Type for all Line Search Algorithms used in NonlinearSolve.jl.
-
-### `__internal_init` specification
-
-```julia
-__internal_init(
-    prob::AbstractNonlinearProblem, alg::AbstractNonlinearSolveLineSearchAlgorithm, f::F,
-    fu, u, p, args...; internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN} -->
-AbstractNonlinearSolveLineSearchCache
-```
-"""
-abstract type AbstractNonlinearSolveLineSearchAlgorithm end
-
 """
     AbstractNonlinearSolveLineSearchCache
 
@@ -512,9 +496,9 @@ SciMLBase.isinplace(::AbstractNonlinearSolveJacobianCache{iip}) where {iip} = ii
 abstract type AbstractNonlinearSolveTraceLevel end
 
 # Default Printing
-for aType in (AbstractTrustRegionMethod, AbstractNonlinearSolveLineSearchAlgorithm,
-    AbstractResetCondition, AbstractApproximateJacobianUpdateRule,
-    AbstractDampingFunction, AbstractNonlinearSolveExtensionAlgorithm)
+for aType in (AbstractTrustRegionMethod, AbstractResetCondition,
+    AbstractApproximateJacobianUpdateRule, AbstractDampingFunction,
+    AbstractNonlinearSolveExtensionAlgorithm)
     @eval function Base.show(io::IO, alg::$(aType))
         print(io, "$(nameof(typeof(alg)))()")
     end
diff --git a/src/algorithms/dfsane.jl b/src/algorithms/dfsane.jl
index b42544055..7a96cf3d2 100644
--- a/src/algorithms/dfsane.jl
+++ b/src/algorithms/dfsane.jl
@@ -19,8 +19,9 @@ For other keyword arguments, see [`RobustNonMonotoneLineSearch`](@ref).
 function DFSane(; σ_min = 1 // 10^10, σ_max = 1e10, σ_1 = 1, M::Int = 10, γ = 1 // 10^4,
         τ_min = 1 // 10, τ_max = 1 // 2, n_exp::Int = 2, max_inner_iterations::Int = 100,
         η_strategy::ETA = (fn_1, n, x_n, f_n) -> fn_1 / n^2) where {ETA}
-    linesearch = RobustNonMonotoneLineSearch(;
-        gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max,
-        n_exp, η_strategy, maxiters = max_inner_iterations)
+    # linesearch = RobustNonMonotoneLineSearch(;
+    #     gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max,
+    #     n_exp, η_strategy, maxiters = max_inner_iterations)
+    linesearch = NoLineSearch()
     return GeneralizedDFSane{:DFSane}(linesearch, σ_min, σ_max, nothing)
 end
diff --git a/src/algorithms/klement.jl b/src/algorithms/klement.jl
index 66daec1b7..ef21fa2ab 100644
--- a/src/algorithms/klement.jl
+++ b/src/algorithms/klement.jl
@@ -27,12 +27,12 @@ over this.
 function Klement(; max_resets::Int = 100, linsolve = nothing, alpha = nothing,
         linesearch = NoLineSearch(), precs = DEFAULT_PRECS,
         autodiff = nothing, init_jacobian::Val{IJ} = Val(:identity)) where {IJ}
-    if !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
-        Base.depwarn(
-            "Passing in a `LineSearches.jl` algorithm directly is deprecated. \
-             Please use `LineSearchesJL` instead.", :Klement)
-        linesearch = LineSearchesJL(; method = linesearch)
-    end
+    # if !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
+    #     Base.depwarn(
+    #         "Passing in a `LineSearches.jl` algorithm directly is deprecated. \
+    #          Please use `LineSearchesJL` instead.", :Klement)
+    #     linesearch = LineSearchesJL(; method = linesearch)
+    # end
 
     if IJ === :identity
         initialization = IdentityInitialization(alpha, DiagonalStructure())
diff --git a/src/algorithms/pseudo_transient.jl b/src/algorithms/pseudo_transient.jl
index fda39a3b9..0da85dd94 100644
--- a/src/algorithms/pseudo_transient.jl
+++ b/src/algorithms/pseudo_transient.jl
@@ -1,7 +1,6 @@
 """
     PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
-        linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(),
-        precs = DEFAULT_PRECS, autodiff = nothing)
+        linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing)
 
 An implementation of PseudoTransient Method [coffey2003pseudotransient](@cite) that is used
 to solve steady state problems in an accelerated manner. It uses an adaptive time-stepping
@@ -16,8 +15,8 @@ This implementation specifically uses "switched evolution relaxation"
     you are going to need more iterations to converge but it can be more stable.
 """
 function PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
-        linesearch::AbstractNonlinearSolveLineSearchAlgorithm = NoLineSearch(),
-        precs = DEFAULT_PRECS, autodiff = nothing, alpha_initial = 1e-3)
+        linesearch = NoLineSearch(), precs = DEFAULT_PRECS, autodiff = nothing,
+        alpha_initial = 1e-3)
     descent = DampedNewtonDescent(; linsolve, precs, initial_damping = alpha_initial,
         damping_fn = SwitchedEvolutionRelaxation())
     return GeneralizedFirstOrderAlgorithm(;
diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl
index e61f34032..24a0a32ed 100644
--- a/src/core/approximate_jacobian.jl
+++ b/src/core/approximate_jacobian.jl
@@ -59,12 +59,12 @@ function ApproximateJacobianSolveAlgorithm{concrete_jac, name}(;
         linesearch = missing, trustregion = missing, descent, update_rule,
         reinit_rule, initialization, max_resets::Int = typemax(Int),
         max_shrink_times::Int = typemax(Int)) where {concrete_jac, name}
-    if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
-        Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \
-                      Please use `LineSearchesJL` instead.",
-            :GeneralizedFirstOrderAlgorithm)
-        linesearch = LineSearchesJL(; method = linesearch)
-    end
+    # if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
+    #     Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \
+    #                   Please use `LineSearchesJL` instead.",
+    #         :GeneralizedFirstOrderAlgorithm)
+    #     linesearch = LineSearchesJL(; method = linesearch)
+    # end
     return ApproximateJacobianSolveAlgorithm{concrete_jac, name}(
         linesearch, trustregion, descent, update_rule,
         reinit_rule, max_resets, max_shrink_times, initialization)
diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl
index 38c3786f5..a798b2064 100644
--- a/src/core/generalized_first_order.jl
+++ b/src/core/generalized_first_order.jl
@@ -66,12 +66,12 @@ function GeneralizedFirstOrderAlgorithm{concrete_jac, name}(;
             jacobian_ad !== nothing && ADTypes.mode(jacobian_ad) isa ADTypes.ReverseMode,
             jacobian_ad, nothing))
 
-    if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
-        Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \
-                      Please use `LineSearchesJL` instead.",
-            :GeneralizedFirstOrderAlgorithm)
-        linesearch = LineSearchesJL(; method = linesearch)
-    end
+    # if linesearch !== missing && !(linesearch isa AbstractNonlinearSolveLineSearchAlgorithm)
+    #     Base.depwarn("Passing in a `LineSearches.jl` algorithm directly is deprecated. \
+    #                   Please use `LineSearchesJL` instead.",
+    #         :GeneralizedFirstOrderAlgorithm)
+    #     linesearch = LineSearchesJL(; method = linesearch)
+    # end
 
     return GeneralizedFirstOrderAlgorithm{concrete_jac, name}(
         linesearch, trustregion, descent, max_shrink_times,
@@ -199,8 +199,11 @@ function SciMLBase.__init(
         if alg.linesearch !== missing
             supports_line_search(alg.descent) || error("Line Search not supported by \
                                                         $(alg.descent).")
-            linesearch_cache = __internal_init(
-                prob, alg.linesearch, f, fu, u, p; stats, internalnorm, kwargs...)
+            linesearch_ad = alg.forward_ad === nothing ?
+                            (alg.reverse_ad === nothing ? alg.jacobian_ad :
+                             alg.reverse_ad) : alg.forward_ad
+            linesearch_cache = init(
+                prob, alg.linesearch, fu, u; stats, autodiff = linesearch_ad, kwargs...)
             GB = :LineSearch
         end
 
@@ -264,8 +267,9 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB};
         cache.make_new_jacobian = true
         if GB === :LineSearch
             @static_timeit cache.timer "linesearch" begin
-                linesearch_failed, α = __internal_solve!(
-                    cache.linesearch_cache, cache.u, δu)
+                linesearch_sol = solve!(cache.linesearch_cache, cache.u, δu)
+                linesearch_failed = !SciMLBase.successful_retcode(linesearch_sol.retcode)
+                α = linesearch_sol.step_size
             end
             if linesearch_failed
                 cache.retcode = ReturnCode.InternalLineSearchFailed
diff --git a/src/core/spectral_methods.jl b/src/core/spectral_methods.jl
index 85e58c1a3..cf67a321b 100644
--- a/src/core/spectral_methods.jl
+++ b/src/core/spectral_methods.jl
@@ -9,9 +9,8 @@ Method.
 
 ### Arguments
 
-  - `linesearch`: Globalization using a Line Search Method. This needs to follow the
-    [`NonlinearSolve.AbstractNonlinearSolveLineSearchAlgorithm`](@ref) interface. This
-    is not optional currently, but that restriction might be lifted in the future.
+  - `linesearch`: Globalization using a Line Search Method. This is not optional currently,
+    but that restriction might be lifted in the future.
   - `σ_min`: The minimum spectral parameter allowed. This is used to ensure that the
     spectral parameter is not too small.
   - `σ_max`: The maximum spectral parameter allowed. This is used to ensure that the
diff --git a/src/globalization/line_search.jl b/src/globalization/line_search.jl
index 5de4610b6..f02f9fd28 100644
--- a/src/globalization/line_search.jl
+++ b/src/globalization/line_search.jl
@@ -1,439 +1,239 @@
-"""
-    NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm
-
-Don't perform a line search. Just return the initial step length of `1`.
-"""
-struct NoLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm end
-
-@concrete mutable struct NoLineSearchCache <: AbstractNonlinearSolveLineSearchCache
-    α
-end
-
-function __internal_init(prob::AbstractNonlinearProblem, alg::NoLineSearch,
-        f::F, fu, u, p, args...; kwargs...) where {F}
-    return NoLineSearchCache(promote_type(eltype(fu), eltype(u))(true))
-end
-
-reinit_cache!(cache::NoLineSearchCache, args...; p = cache.p, kwargs...) = nothing
-
-__internal_solve!(cache::NoLineSearchCache, u, du) = false, cache.α
-
-"""
-    LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true)
-
-Wrapper over algorithms from
-[LineSearches.jl](https://github.com/JuliaNLSolvers/LineSearches.jl/). Allows automatic
-construction of the objective functions for the line search algorithms utilizing automatic
-differentiation for fast Vector Jacobian Products.
-
-### Arguments
-
-  - `method`: the line search algorithm to use. Defaults to
-    `method = LineSearches.Static()`, which means that the step size is fixed to the value
-    of `alpha`.
-  - `autodiff`: the automatic differentiation backend to use for the line search. Using a
-    reverse mode automatic differentiation backend if recommended.
-  - `α`: the initial step size to use. Defaults to `true` (which is equivalent to `1`).
-"""
-@concrete struct LineSearchesJL <: AbstractNonlinearSolveLineSearchAlgorithm
-    method
-    initial_alpha
-    autodiff
-end
-
-function Base.show(io::IO, alg::LineSearchesJL)
-    str = "$(nameof(typeof(alg)))("
-    modifiers = String[]
-    __is_present(alg.autodiff) &&
-        push!(modifiers, "autodiff = $(nameof(typeof(alg.autodiff)))()")
-    alg.initial_alpha != true && push!(modifiers, "initial_alpha = $(alg.initial_alpha)")
-    push!(modifiers, "method = $(nameof(typeof(alg.method)))()")
-    print(io, str, join(modifiers, ", "), ")")
-end
-
 LineSearchesJL(method; kwargs...) = LineSearchesJL(; method, kwargs...)
 function LineSearchesJL(; method = LineSearches.Static(), autodiff = nothing, α = true)
-    if method isa LineSearchesJL  # Prevent breaking old code
-        return LineSearchesJL(method.method, α, autodiff)
-    end
-
-    if method isa AbstractNonlinearSolveLineSearchAlgorithm
-        Base.depwarn("Passing a native NonlinearSolve line search algorithm to \
-                      `LineSearchesJL` or `LineSearch` is deprecated. Pass the method \
-                      directly instead.",
-            :LineSearchesJL)
-        return method
-    end
-    return LineSearchesJL(method, α, autodiff)
-end
-
-Base.@deprecate_binding LineSearch LineSearchesJL true
-
-Static(args...; kwargs...) = LineSearchesJL(LineSearches.Static(args...; kwargs...))
-HagerZhang(args...; kwargs...) = LineSearchesJL(LineSearches.HagerZhang(args...; kwargs...))
-function MoreThuente(args...; kwargs...)
-    return LineSearchesJL(LineSearches.MoreThuente(args...; kwargs...))
-end
-function BackTracking(args...; kwargs...)
-    return LineSearchesJL(LineSearches.BackTracking(args...; kwargs...))
-end
-function StrongWolfe(args...; kwargs...)
-    return LineSearchesJL(LineSearches.StrongWolfe(args...; kwargs...))
-end
-
-# Wrapper over LineSearches.jl algorithms
-@concrete mutable struct LineSearchesJLCache <: AbstractNonlinearSolveLineSearchCache
-    f
-    p
-    ϕ
-    dϕ
-    ϕdϕ
-    method
-    alpha
-    deriv_op
-    u_cache
-    fu_cache
-    stats::NLStats
-end
-
-function __internal_init(
-        prob::AbstractNonlinearProblem, alg::LineSearchesJL, f::F, fu, u, p,
-        args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
-    T = promote_type(eltype(fu), eltype(u))
-    if u isa Number
-        autodiff = get_concrete_forward_ad(alg.autodiff, prob; check_forward_mode = true)
-        if !(autodiff isa AutoForwardDiff ||
-             autodiff isa AutoPolyesterForwardDiff ||
-             autodiff isa AutoFiniteDiff)
-            autodiff = AutoFiniteDiff()
-            # Other cases are not properly supported so we fallback to finite differencing
-            @warn "Scalar AD is supported only for AutoForwardDiff and AutoFiniteDiff. \
-                   Detected $(autodiff). Falling back to AutoFiniteDiff."
-        end
-        deriv_op = @closure (du, u, fu, p) -> last(__value_derivative(
-                                                  autodiff, Base.Fix2(f, p), u)) *
-                                              fu *
-                                              du
-    else
-        # Both forward and reverse AD can be used for line-search.
-        # We prefer forward AD for better performance, however, reverse AD is also supported if user explicitly requests it.
-        # 1. If jvp is available, we use forward AD;
-        # 2. If vjp is available, we use reverse AD;
-        # 3. If reverse type is requested, we use reverse AD;
-        # 4. Finally, we use forward AD.
-        if alg.autodiff isa AutoFiniteDiff
-            deriv_op = nothing
-        elseif SciMLBase.has_jvp(f)
-            if isinplace(prob)
-                jvp_cache = zero(fu)
-                deriv_op = @closure (du, u, fu, p) -> begin
-                    f.jvp(jvp_cache, du, u, p)
-                    dot(fu, jvp_cache)
-                end
-            else
-                deriv_op = @closure (du, u, fu, p) -> dot(fu, f.jvp(du, u, p))
-            end
-        elseif SciMLBase.has_vjp(f)
-            if isinplace(prob)
-                vjp_cache = zero(u)
-                deriv_op = @closure (du, u, fu, p) -> begin
-                    f.vjp(vjp_cache, fu, u, p)
-                    dot(du, vjp_cache)
-                end
-            else
-                deriv_op = @closure (du, u, fu, p) -> dot(du, f.vjp(fu, u, p))
-            end
-        elseif alg.autodiff !== nothing &&
-               ADTypes.mode(alg.autodiff) isa ADTypes.ReverseMode
-            autodiff = get_concrete_reverse_ad(
-                alg.autodiff, prob; check_reverse_mode = true)
-            vjp_op = VecJacOperator(prob, fu, u; autodiff)
-            if isinplace(prob)
-                vjp_cache = zero(u)
-                deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(vjp_cache, fu, u, p))
-            else
-                deriv_op = @closure (du, u, fu, p) -> dot(du, vjp_op(fu, u, p))
-            end
-        else
-            autodiff = get_concrete_forward_ad(
-                alg.autodiff, prob; check_forward_mode = true)
-            jvp_op = JacVecOperator(prob, fu, u; autodiff)
-            if isinplace(prob)
-                jvp_cache = zero(fu)
-                deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(jvp_cache, du, u, p))
-            else
-                deriv_op = @closure (du, u, fu, p) -> dot(fu, jvp_op(du, u, p))
-            end
-        end
-    end
-
-    @bb u_cache = similar(u)
-    @bb fu_cache = similar(fu)
-
-    ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
-        @bb @. u_cache = u + α * du
-        fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-        stats.nf += 1
-        return @fastmath internalnorm(fu_cache)^2 / 2
-    end
-
-    dϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin
-        @bb @. u_cache = u + α * du
-        fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-        stats.nf += 1
-        return deriv_op(du, u_cache, fu_cache, p)
-    end
-
-    ϕdϕ = @closure (f, p, u, du, α, u_cache, fu_cache, deriv_op) -> begin
-        @bb @. u_cache = u + α * du
-        fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-        stats.nf += 1
-        deriv = deriv_op(du, u_cache, fu_cache, p)
-        obj = @fastmath internalnorm(fu_cache)^2 / 2
-        return obj, deriv
-    end
-
-    return LineSearchesJLCache(f, p, ϕ, dϕ, ϕdϕ, alg.method, T(alg.initial_alpha),
-        deriv_op, u_cache, fu_cache, stats)
-end
-
-function __internal_solve!(cache::LineSearchesJLCache, u, du; kwargs...)
-    ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
-    if cache.deriv_op !== nothing
-        dϕ = @closure α -> cache.dϕ(
-            cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op)
-        ϕdϕ = @closure α -> cache.ϕdϕ(
-            cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache, cache.deriv_op)
-    else
-        dϕ = @closure α -> FiniteDiff.finite_difference_derivative(ϕ, α)
-        ϕdϕ = @closure α -> (ϕ(α), FiniteDiff.finite_difference_derivative(ϕ, α))
-    end
-
-    ϕ₀, dϕ₀ = ϕdϕ(zero(eltype(u)))
-
-    # Here we should be resetting the search direction for some algorithms especially
-    # if we start mixing in jacobian reuse and such
-    dϕ₀ ≥ 0 && return (true, one(eltype(u)))
-
-    # We can technically reduce 1 axpy by reusing the returned value from cache.method
-    # but it's not worth the extra complexity
-    cache.alpha = first(cache.method(ϕ, dϕ, ϕdϕ, cache.alpha, ϕ₀, dϕ₀))
-    return (false, cache.alpha)
-end
-
-"""
-    RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10,
-        tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100,
-        η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2)
-
-Robust NonMonotone Line Search is a derivative free line search method from DF Sane
-[la2006spectral](@cite).
-
-### Keyword Arguments
-
-  - `M`: The monotonicity of the algorithm is determined by a this positive integer.
-    A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
-    of the function `f`. However, higher values allow for more flexibility in this reduction.
-    Despite this, the algorithm still ensures global convergence through the use of a
-    non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
-    condition. Values in the range of 5 to 20 are usually sufficient, but some cases may
-    call for a higher value of `M`. The default setting is 10.
-  - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value
-    of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to
-    `1e-4`.
-  - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and
-    this parameter is the minimum value of that factor. Defaults to `0.1`.
-  - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and
-    this parameter is the maximum value of that factor. Defaults to `0.5`.
-  - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses
-    `n_exp ∈ {1, 2}`. Defaults to `2`.
-  - `η_strategy`:  function to determine the parameter `η`, which enables growth
-    of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized
-    as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current
-    `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``.
-    Defaults to ``fn_1 / n^2``.
-  - `maxiters`: the maximum number of iterations allowed for the inner loop of the
-    algorithm. Defaults to `100`.
-"""
-@kwdef @concrete struct RobustNonMonotoneLineSearch <:
-                        AbstractNonlinearSolveLineSearchAlgorithm
-    gamma = 1 // 10000
-    sigma_1 = 1
-    M::Int = 10
-    tau_min = 1 // 10
-    tau_max = 1 // 2
-    n_exp::Int = 2
-    maxiters::Int = 100
-    η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2
-end
-
-@concrete mutable struct RobustNonMonotoneLineSearchCache <:
-                         AbstractNonlinearSolveLineSearchCache
-    f
-    p
-    ϕ
-    u_cache
-    fu_cache
-    internalnorm
-    maxiters::Int
-    history
-    γ
-    σ₁
-    M::Int
-    τ_min
-    τ_max
-    nsteps::Int
-    η_strategy
-    n_exp::Int
-    stats::NLStats
-end
-
-function __internal_init(
-        prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u,
-        p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
-    @bb u_cache = similar(u)
-    @bb fu_cache = similar(fu)
-    T = promote_type(eltype(fu), eltype(u))
-
-    ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
-        @bb @. u_cache = u + α * du
-        fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-        stats.nf += 1
-        return internalnorm(fu_cache)^alg.n_exp
-    end
-
-    fn₁ = internalnorm(fu)^alg.n_exp
-    η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ)
-
-    return RobustNonMonotoneLineSearchCache(
-        f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters,
-        fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M,
-        T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats)
-end
-
-function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...)
-    T = promote_type(eltype(u), eltype(du))
-    ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
-    f_norm_old = ϕ(eltype(u)(0))
-    α₊, α₋ = T(cache.σ₁), T(cache.σ₁)
-    η = cache.η_strategy(cache.nsteps, u, f_norm_old)
-    f_bar = maximum(cache.history)
-
-    for k in 1:(cache.maxiters)
-        f_norm = ϕ(α₊)
-        f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊)
-
-        α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old),
-            cache.τ_min, cache.τ_max)
-
-        f_norm = ϕ(-α₋)
-        f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋)
-
-        α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old),
-            cache.τ_min, cache.τ_max)
+    Base.depwarn("`LineSearchesJL(...)` is deprecated. Please use `LineSearchesJL` from \
+                  LineSearch.jl instead.",
+        :LineSearchesJL)
+
+    # Prevent breaking old code
+    method isa LineSearch.LineSearchesJL &&
+        return LineSearch.LineSearchesJL(method.method, α, autodiff)
+    method isa AbstractLineSearchAlgorithm && return method
+    return LineSearch.LineSearchesJL(method, α, autodiff)
+end
+
+for alg in (:Static, :HagerZhang, :MoreThuente, :BackTracking, :StrongWolfe)
+    depmsg = "`$(alg)(args...; kwargs...)` is deprecated. Please use `LineSearchesJL(; \
+              method = $(alg)(args...; kwargs...))` instead."
+    @eval function $(alg)(args...; autodiff = nothing, initial_alpha = true, kwargs...)
+        Base.depwarn($(depmsg), $(alg))
+        return LineSearch.LineSearchesJL(;
+            method = LineSearches.$(alg)(args...; kwargs...), autodiff, initial_alpha)
     end
-
-    return true, T(cache.σ₁)
 end
 
-function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...)
-    fu = get_fu(topcache)
-    cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp
-    cache.nsteps += 1
-    return
-end
-
-"""
-    LiFukushimaLineSearch(; lambda_0 = 1, beta = 1 // 2, sigma_1 = 1 // 1000,
-        sigma_2 = 1 // 1000, eta = 1 // 10, nan_max_iter::Int = 5, maxiters::Int = 100)
-
-A derivative-free line search and global convergence of Broyden-like method for nonlinear
-equations [li2000derivative](@cite).
-"""
-@kwdef @concrete struct LiFukushimaLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm
-    lambda_0 = 1
-    beta = 1 // 2
-    sigma_1 = 1 // 1000
-    sigma_2 = 1 // 1000
-    eta = 1 // 10
-    rho = 9 // 10
-    nan_max_iter::Int = 5  # TODO (breaking): Change this to nan_maxiters for uniformity
-    maxiters::Int = 100
-end
-
-@concrete mutable struct LiFukushimaLineSearchCache <: AbstractNonlinearSolveLineSearchCache
-    ϕ
-    f
-    p
-    internalnorm
-    u_cache
-    fu_cache
-    λ₀
-    β
-    σ₁
-    σ₂
-    η
-    ρ
-    α
-    nan_maxiters::Int
-    maxiters::Int
-    stats::NLStats
-end
-
-function __internal_init(
-        prob::AbstractNonlinearProblem, alg::LiFukushimaLineSearch, f::F, fu, u, p,
-        args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
-    @bb u_cache = similar(u)
-    @bb fu_cache = similar(fu)
-    T = promote_type(eltype(fu), eltype(u))
-
-    ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
-        @bb @. u_cache = u + α * du
-        fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-        stats.nf += 1
-        return internalnorm(fu_cache)
-    end
-
-    return LiFukushimaLineSearchCache(
-        ϕ, f, p, internalnorm, u_cache, fu_cache, T(alg.lambda_0),
-        T(alg.beta), T(alg.sigma_1), T(alg.sigma_2), T(alg.eta),
-        T(alg.rho), T(true), alg.nan_max_iter, alg.maxiters, stats)
-end
-
-function __internal_solve!(cache::LiFukushimaLineSearchCache, u, du; kwargs...)
-    T = promote_type(eltype(u), eltype(du))
-    ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
-
-    fx_norm = ϕ(T(0))
-
-    # Non-Blocking exit if the norm is NaN or Inf
-    !isfinite(fx_norm) && return (true, cache.α)
-
-    # Early Terminate based on Eq. 2.7
-    du_norm = cache.internalnorm(du)
-    fxλ_norm = ϕ(cache.α)
-    fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return (false, cache.α)
-
-    λ₂, λ₁ = cache.λ₀, cache.λ₀
-    fxλp_norm = ϕ(λ₂)
-
-    if !isfinite(fxλp_norm)
-        nan_converged = false
-        for _ in 1:(cache.nan_maxiters)
-            λ₁, λ₂ = λ₂, cache.β * λ₂
-            fxλp_norm = ϕ(λ₂)
-            nan_converged = isfinite(fxλp_norm)
-            nan_converged && break
-        end
-        nan_converged || return (true, cache.α)
-    end
-
-    for i in 1:(cache.maxiters)
-        fxλp_norm = ϕ(λ₂)
-        converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
-        converged && return (false, λ₂)
-        λ₁, λ₂ = λ₂, cache.β * λ₂
-    end
-
-    return true, cache.α
-end
+# """
+#     RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10,
+#         tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100,
+#         η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2)
+
+# Robust NonMonotone Line Search is a derivative free line search method from DF Sane
+# [la2006spectral](@cite).
+
+# ### Keyword Arguments
+
+#   - `M`: The monotonicity of the algorithm is determined by a this positive integer.
+#     A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
+#     of the function `f`. However, higher values allow for more flexibility in this reduction.
+#     Despite this, the algorithm still ensures global convergence through the use of a
+#     non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
+#     condition. Values in the range of 5 to 20 are usually sufficient, but some cases may
+#     call for a higher value of `M`. The default setting is 10.
+#   - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value
+#     of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to
+#     `1e-4`.
+#   - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and
+#     this parameter is the minimum value of that factor. Defaults to `0.1`.
+#   - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and
+#     this parameter is the maximum value of that factor. Defaults to `0.5`.
+#   - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses
+#     `n_exp ∈ {1, 2}`. Defaults to `2`.
+#   - `η_strategy`:  function to determine the parameter `η`, which enables growth
+#     of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized
+#     as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current
+#     `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``.
+#     Defaults to ``fn_1 / n^2``.
+#   - `maxiters`: the maximum number of iterations allowed for the inner loop of the
+#     algorithm. Defaults to `100`.
+# """
+# @kwdef @concrete struct RobustNonMonotoneLineSearch <:
+#                         AbstractNonlinearSolveLineSearchAlgorithm
+#     gamma = 1 // 10000
+#     sigma_1 = 1
+#     M::Int = 10
+#     tau_min = 1 // 10
+#     tau_max = 1 // 2
+#     n_exp::Int = 2
+#     maxiters::Int = 100
+#     η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2
+# end
+
+# @concrete mutable struct RobustNonMonotoneLineSearchCache <:
+#                          AbstractNonlinearSolveLineSearchCache
+#     f
+#     p
+#     ϕ
+#     u_cache
+#     fu_cache
+#     internalnorm
+#     maxiters::Int
+#     history
+#     γ
+#     σ₁
+#     M::Int
+#     τ_min
+#     τ_max
+#     nsteps::Int
+#     η_strategy
+#     n_exp::Int
+#     stats::NLStats
+# end
+
+# function __internal_init(
+#         prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u,
+#         p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
+#     @bb u_cache = similar(u)
+#     @bb fu_cache = similar(fu)
+#     T = promote_type(eltype(fu), eltype(u))
+
+#     ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
+#         @bb @. u_cache = u + α * du
+#         fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
+#         stats.nf += 1
+#         return internalnorm(fu_cache)^alg.n_exp
+#     end
+
+#     fn₁ = internalnorm(fu)^alg.n_exp
+#     η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ)
+
+#     return RobustNonMonotoneLineSearchCache(
+#         f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters,
+#         fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M,
+#         T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats)
+# end
+
+# function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...)
+#     T = promote_type(eltype(u), eltype(du))
+#     ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
+#     f_norm_old = ϕ(eltype(u)(0))
+#     α₊, α₋ = T(cache.σ₁), T(cache.σ₁)
+#     η = cache.η_strategy(cache.nsteps, u, f_norm_old)
+#     f_bar = maximum(cache.history)
+
+#     for k in 1:(cache.maxiters)
+#         f_norm = ϕ(α₊)
+#         f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊)
+
+#         α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old),
+#             cache.τ_min, cache.τ_max)
+
+#         f_norm = ϕ(-α₋)
+#         f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋)
+
+#         α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old),
+#             cache.τ_min, cache.τ_max)
+#     end
+
+#     return true, T(cache.σ₁)
+# end
+
+# function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...)
+#     fu = get_fu(topcache)
+#     cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp
+#     cache.nsteps += 1
+#     return
+# end
+
+# """
+#     LiFukushimaLineSearch(; lambda_0 = 1, beta = 1 // 2, sigma_1 = 1 // 1000,
+#         sigma_2 = 1 // 1000, eta = 1 // 10, nan_max_iter::Int = 5, maxiters::Int = 100)
+
+# A derivative-free line search and global convergence of Broyden-like method for nonlinear
+# equations [li2000derivative](@cite).
+# """
+# @kwdef @concrete struct LiFukushimaLineSearch <: AbstractNonlinearSolveLineSearchAlgorithm
+#     lambda_0 = 1
+#     beta = 1 // 2
+#     sigma_1 = 1 // 1000
+#     sigma_2 = 1 // 1000
+#     eta = 1 // 10
+#     rho = 9 // 10
+#     nan_max_iter::Int = 5  # TODO (breaking): Change this to nan_maxiters for uniformity
+#     maxiters::Int = 100
+# end
+
+# @concrete mutable struct LiFukushimaLineSearchCache <: AbstractNonlinearSolveLineSearchCache
+#     ϕ
+#     f
+#     p
+#     internalnorm
+#     u_cache
+#     fu_cache
+#     λ₀
+#     β
+#     σ₁
+#     σ₂
+#     η
+#     ρ
+#     α
+#     nan_maxiters::Int
+#     maxiters::Int
+#     stats::NLStats
+# end
+
+# function __internal_init(
+#         prob::AbstractNonlinearProblem, alg::LiFukushimaLineSearch, f::F, fu, u, p,
+#         args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
+#     @bb u_cache = similar(u)
+#     @bb fu_cache = similar(fu)
+#     T = promote_type(eltype(fu), eltype(u))
+
+#     ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
+#         @bb @. u_cache = u + α * du
+#         fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
+#         stats.nf += 1
+#         return internalnorm(fu_cache)
+#     end
+
+#     return LiFukushimaLineSearchCache(
+#         ϕ, f, p, internalnorm, u_cache, fu_cache, T(alg.lambda_0),
+#         T(alg.beta), T(alg.sigma_1), T(alg.sigma_2), T(alg.eta),
+#         T(alg.rho), T(true), alg.nan_max_iter, alg.maxiters, stats)
+# end
+
+# function __internal_solve!(cache::LiFukushimaLineSearchCache, u, du; kwargs...)
+#     T = promote_type(eltype(u), eltype(du))
+#     ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
+
+#     fx_norm = ϕ(T(0))
+
+#     # Non-Blocking exit if the norm is NaN or Inf
+#     !isfinite(fx_norm) && return (true, cache.α)
+
+#     # Early Terminate based on Eq. 2.7
+#     du_norm = cache.internalnorm(du)
+#     fxλ_norm = ϕ(cache.α)
+#     fxλ_norm ≤ cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return (false, cache.α)
+
+#     λ₂, λ₁ = cache.λ₀, cache.λ₀
+#     fxλp_norm = ϕ(λ₂)
+
+#     if !isfinite(fxλp_norm)
+#         nan_converged = false
+#         for _ in 1:(cache.nan_maxiters)
+#             λ₁, λ₂ = λ₂, cache.β * λ₂
+#             fxλp_norm = ϕ(λ₂)
+#             nan_converged = isfinite(fxλp_norm)
+#             nan_converged && break
+#         end
+#         nan_converged || return (true, cache.α)
+#     end
+
+#     for i in 1:(cache.maxiters)
+#         fxλp_norm = ϕ(λ₂)
+#         converged = fxλp_norm ≤ (1 + cache.η) * fx_norm - cache.σ₁ * λ₂^2 * du_norm^2
+#         converged && return (false, λ₂)
+#         λ₁, λ₂ = λ₂, cache.β * λ₂
+#     end
+
+#     return true, cache.α
+# end