From 1e4cfde6d7aac4439f77893ce11668df2b57cc69 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Fri, 20 Oct 2023 18:41:50 -0400 Subject: [PATCH] Broyden supports matrices --- src/broyden.jl | 18 +++++++++--------- src/levenberg.jl | 8 +++++--- src/pseudotransient.jl | 15 +++------------ src/raphson.jl | 2 +- src/trustRegion.jl | 3 ++- src/utils.jl | 6 +++--- test/matrix_resizing.jl | 12 +++++++----- 7 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/broyden.jl b/src/broyden.jl index 121a2ca71..1f8bc73bc 100644 --- a/src/broyden.jl +++ b/src/broyden.jl @@ -58,7 +58,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde fu = evaluate_f(prob, u) J⁻¹ = __init_identity_jacobian(u, fu) return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu), - zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets, + zero(fu), p, J⁻¹, zero(_vec(fu)'), _mutable_zero(u), false, 0, alg.max_resets, maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0), init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip))) end @@ -67,7 +67,7 @@ function perform_step!(cache::GeneralBroydenCache{true}) @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache T = eltype(u) - mul!(du, J⁻¹, -fu) + mul!(_vec(du), J⁻¹, -_vec(fu)) α = perform_linesearch!(cache.lscache, u, du) axpy!(α, du, u) f(fu2, u, p) @@ -85,10 +85,10 @@ function perform_step!(cache::GeneralBroydenCache{true}) J⁻¹[diagind(J⁻¹)] .= T(1) cache.resets += 1 else - mul!(J⁻¹df, J⁻¹, dfu) - mul!(J⁻¹₂, du', J⁻¹) + mul!(_vec(J⁻¹df), J⁻¹, _vec(dfu)) + mul!(J⁻¹₂, _vec(du)', J⁻¹) du .= (du .- J⁻¹df) ./ (dot(du, J⁻¹df) .+ T(1e-5)) - mul!(J⁻¹, reshape(du, :, 1), J⁻¹₂, 1, 1) + mul!(J⁻¹, _vec(du), J⁻¹₂, 1, 1) end fu .= fu2 @@ -99,7 +99,7 @@ function perform_step!(cache::GeneralBroydenCache{false}) @unpack f, p = cache T = eltype(cache.u) - cache.du = cache.J⁻¹ * -cache.fu + cache.du = _restructure(cache.du, cache.J⁻¹ * -_vec(cache.fu)) α = perform_linesearch!(cache.lscache, cache.u, cache.du) cache.u = cache.u .+ α * cache.du cache.fu2 = f(cache.u, p) @@ -119,10 +119,10 @@ function perform_step!(cache::GeneralBroydenCache{false}) cache.J⁻¹ = J⁻¹ cache.resets += 1 else - cache.J⁻¹df = cache.J⁻¹ * cache.dfu - cache.J⁻¹₂ = cache.du' * cache.J⁻¹ + cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu)) + cache.J⁻¹₂ = _vec(cache.du)' * cache.J⁻¹ cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5)) - cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂ + cache.J⁻¹ = cache.J⁻¹ .+ _vec(cache.du) * cache.J⁻¹₂ end cache.fu = cache.fu2 diff --git a/src/levenberg.jl b/src/levenberg.jl index 016a12f31..fbfcc6a9a 100644 --- a/src/levenberg.jl +++ b/src/levenberg.jl @@ -301,7 +301,9 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) else linres = dolinsolve(alg.precs, linsolve; b = _mutable(_vec(J' * #((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))), - _vec(((2 / h) .* ((_vec(f(u .+ h .* _restructure(u,v), p)) .- _vec(fu1)) ./ h .- J * _vec(v)))))), + _vec(((2 / h) .* + ((_vec(f(u .+ h .* _restructure(u, v), p)) .- + _vec(fu1)) ./ h .- J * _vec(v)))))), linu = _vec(cache.a), p, reltol = cache.abstol) cache.linsolve = linres.cache end @@ -311,7 +313,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) # Require acceptable steps to satisfy the following condition. norm_v = norm(v) if 2 * norm(cache.a) ≤ α_geodesic * norm_v - cache.δ = _restructure(cache.δ,_vec(v) .+ _vec(cache.a) ./ 2) + cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2) @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache fu_new = f(u .+ δ, p) cache.stats.nf += 1 @@ -327,7 +329,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false}) return nothing end cache.fu1 = fu_new - cache.v_old = _restructure(cache.v_old,v) + cache.v_old = _restructure(cache.v_old, v) cache.norm_v_old = norm_v cache.loss_old = loss cache.λ_factor = 1 / cache.damping_decrease_factor diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 8e70f2350..a041c258c 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -29,10 +29,6 @@ SIAM Journal on Scientific Computing,25, 553-569.](https://doi.org/10.1137/S1064 [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/). - `alpha_initial` : the initial pseudo time step. it defaults to 1e-3. If it is small, you are going to need more iterations to converge but it can be more stable. - - - - """ @concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD} ad::AD @@ -92,19 +88,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::PseudoTransi else fu1 = _mutable(f(u, p)) end - uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, - f, - u, - p, - Val(iip); + uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip); linsolve_kwargs) alpha = convert(eltype(u), alg.alpha_initial) res_norm = internalnorm(fu1) return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf, - linsolve, J, - jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob, - NLStats(1, 0, 0, 0, 0)) + linsolve, J, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, + prob, NLStats(1, 0, 0, 0, 0)) end function perform_step!(cache::PseudoTransientCache{true}) diff --git a/src/raphson.jl b/src/raphson.jl index a3d9bbc6d..db9e9e322 100644 --- a/src/raphson.jl +++ b/src/raphson.jl @@ -97,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true}) # Line Search α = perform_linesearch!(cache.lscache, u, du) - axpy!(α, du, u) + axpy!(-α, du, u) f(cache.fu1, u, p) cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true) diff --git a/src/trustRegion.jl b/src/trustRegion.jl index f97b63637..8094dc09c 100644 --- a/src/trustRegion.jl +++ b/src/trustRegion.jl @@ -419,7 +419,8 @@ function trust_region_step!(cache::TrustRegionCache) cache.loss_new = get_loss(fu_new) # Compute the ratio of the actual reduction to the predicted reduction. - cache.r = -(loss - cache.loss_new) / (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2) + cache.r = -(loss - cache.loss_new) / + (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2) @unpack r = cache if radius_update_scheme === RadiusUpdateSchemes.Simple diff --git a/src/utils.jl b/src/utils.jl index d33dc292d..7c6413937 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -74,8 +74,8 @@ end @inline _vec(v::Number) = v @inline _vec(v::AbstractVector) = v -@inline _restructure(y,x) = restructure(y,x) -@inline _restructure(y::Number,x::Number) = x +@inline _restructure(y, x) = restructure(y, x) +@inline _restructure(y::Number, x::Number) = x DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing @@ -220,4 +220,4 @@ end function __init_identity_jacobian(u::StaticArray, fu) return convert(MArray{Tuple{length(fu), length(u)}}, Matrix{eltype(u)}(I, length(fu), length(u))) -end \ No newline at end of file +end diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl index f8ad423e5..1aa60fa05 100644 --- a/test/matrix_resizing.jl +++ b/test/matrix_resizing.jl @@ -1,21 +1,23 @@ using NonlinearSolve, Test ff(u, p) = u .* u .- p -u0 = rand(2,2) +u0 = rand(2, 2) p = 2.0 vecprob = NonlinearProblem(ff, vec(u0), p) prob = NonlinearProblem(ff, u0, p) -for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg()) +for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u end fiip(du, u, p) = (du .= u .* u .- p) -u0 = rand(2,2) +u0 = rand(2, 2) p = 2.0 vecprob = NonlinearProblem(fiip, vec(u0), p) prob = NonlinearProblem(fiip, u0, p) -for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg()) +for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), + RobustMultiNewton(), FastShortcutNonlinearPolyalg(), GeneralBroyden()) @test vec(solve(prob, alg).u) == solve(vecprob, alg).u -end \ No newline at end of file +end