diff --git a/src/composite_algs.jl b/src/composite_algs.jl index 34a64aa502..462fa083f3 100644 --- a/src/composite_algs.jl +++ b/src/composite_algs.jl @@ -15,7 +15,7 @@ function is_stiff(integrator, alg, ntol, stol, is_stiffalg) stiffness = abs(eigen_est * dt / alg_stability_size(alg)) # `abs` here is just for safety tol = is_stiffalg ? stol : ntol os = oneunit(stiffness) - bool = stiffness > os * tol + bool = !(stiffness <= os * tol) if !bool integrator.alg.choice_function.successive_switches += 1 @@ -111,7 +111,7 @@ current_nonstiff(current) = ifelse(current <= NUM_NONSTIFF, current, current - N function DefaultODEAlgorithm(; lazy = true, stiffalgfirst = false, kwargs...) nonstiff = (Tsit5(), Vern7(lazy = lazy)) stiff = (Rosenbrock23(; kwargs...), Rodas5P(; kwargs...), FBDF(; kwargs...), - FBDF(; linsolve = LinearSolve.KrylovJL_GMRES())) + FBDF(; linsolve = LinearSolve.KrylovJL_GMRES(), kwargs...)) AutoAlgSwitch(nonstiff, stiff; stiffalgfirst) end diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl index 82fb00fc6d..b9380db407 100644 --- a/src/derivative_utils.jl +++ b/src/derivative_utils.jl @@ -140,7 +140,6 @@ function calc_J!(J, integrator, cache, next_step::Bool = false) if !(p isa DiffEqBase.NullParameters) uf.p = p end - jacobian!(J, uf, uprev, du1, integrator, jac_config) end end @@ -294,7 +293,7 @@ SciMLBase.isinplace(::WOperator{IIP}, i) where {IIP} = IIP Base.eltype(W::WOperator) = eltype(W.J) # In WOperator update_coefficients!, accept both missing u/p/t and missing dtgamma/transform and don't update them in that case. -# This helps support partial updating logic used with Newton solvers. +# This helps support partial updating logic used with Newton solvers. function SciMLOperators.update_coefficients!(W::WOperator, u = nothing, p = nothing, @@ -483,8 +482,8 @@ end end function jacobian2W!( - W::AbstractMatrix, mass_matrix::MT, dtgamma::Number, J::AbstractMatrix, - W_transform::Bool)::Nothing where {MT} + W::AbstractMatrix, mass_matrix, dtgamma::Number, J::AbstractMatrix, + W_transform::Bool)::Nothing # check size and dimension iijj = axes(W) @boundscheck (iijj == axes(J) && length(iijj) == 2) || _throwWJerror(W, J) @@ -492,7 +491,7 @@ function jacobian2W!( @boundscheck axes(mass_matrix) == axes(W) || _throwWMerror(W, mass_matrix) @inbounds if W_transform invdtgamma = inv(dtgamma) - if MT <: UniformScaling + if mass_matrix isa UniformScaling copyto!(W, J) idxs = diagind(W) λ = -mass_matrix.λ @@ -508,28 +507,9 @@ function jacobian2W!( @.. broadcast=false W=muladd(-mass_matrix, invdtgamma, J) end else - if MT <: UniformScaling + if mass_matrix isa UniformScaling λ = -mass_matrix.λ - if W isa AbstractSparseMatrix && !(W isa SparseMatrixCSC) - # This is specifically to catch the GPU sparse matrix cases - # Which do not support diagonal indexing - # https://github.com/JuliaGPU/CUDA.jl/issues/1395 - - Wn = nonzeros(W) - Jn = nonzeros(J) - - # I would hope to check this generically, but `CuSparseMatrixCSC` has `colPtr` - # and `rowVal` while SparseMatrixCSC is colptr and rowval, and there is no - # standard for checking sparsity patterns in general. So for now, write it for - # the convention of CUDA.jl and handle the case of some other convention when - # it comes up. - - @assert J.colPtr == W.colPtr - @assert J.rowVal == W.rowVal - - @.. broadcast=false Wn=dtgamma * Jn - W .= W + λ * I - elseif W isa SparseMatrixCSC + if W isa SparseMatrixCSC #= using LinearAlgebra, SparseArrays, FastBroadcast J = sparse(Diagonal(ones(4))) @@ -553,6 +533,25 @@ function jacobian2W!( @.. broadcast=false W.nzval=dtgamma * J.nzval idxs = diagind(W) @.. broadcast=false @view(W[idxs])=@view(W[idxs]) + λ + elseif W isa AbstractSparseMatrix + # This is specifically to catch the GPU sparse matrix cases + # Which do not support diagonal indexing + # https://github.com/JuliaGPU/CUDA.jl/issues/1395 + + Wn = nonzeros(W) + Jn = nonzeros(J) + + # I would hope to check this generically, but `CuSparseMatrixCSC` has `colPtr` + # and `rowVal` while SparseMatrixCSC is colptr and rowval, and there is no + # standard for checking sparsity patterns in general. So for now, write it for + # the convention of CUDA.jl and handle the case of some other convention when + # it comes up. + + @assert J.colPtr == W.colPtr + @assert J.rowVal == W.rowVal + + @.. broadcast=false Wn=dtgamma * Jn + W .= W + λ * I else # Anything not a sparse matrix @.. broadcast=false W=dtgamma * J idxs = diagind(W) @@ -565,8 +564,8 @@ function jacobian2W!( return nothing end -function jacobian2W!(W::Matrix, mass_matrix::MT, dtgamma::Number, J::Matrix, - W_transform::Bool)::Nothing where {MT} +function jacobian2W!(W::Matrix, mass_matrix, dtgamma::Number, J::Matrix, + W_transform::Bool)::Nothing # check size and dimension iijj = axes(W) @boundscheck (iijj == axes(J) && length(iijj) == 2) || _throwWJerror(W, J) @@ -574,7 +573,7 @@ function jacobian2W!(W::Matrix, mass_matrix::MT, dtgamma::Number, J::Matrix, @boundscheck axes(mass_matrix) == axes(W) || _throwWMerror(W, mass_matrix) @inbounds if W_transform invdtgamma = inv(dtgamma) - if MT <: UniformScaling + if mass_matrix isa UniformScaling copyto!(W, J) idxs = diagind(W) λ = -mass_matrix.λ @@ -587,7 +586,7 @@ function jacobian2W!(W::Matrix, mass_matrix::MT, dtgamma::Number, J::Matrix, end end else - if MT <: UniformScaling + if mass_matrix isa UniformScaling idxs = diagind(W) @inbounds @simd ivdep for i in eachindex(W) W[i] = dtgamma * J[i] diff --git a/src/perform_step/explicit_rk_perform_step.jl b/src/perform_step/explicit_rk_perform_step.jl index 03e937a43d..0027e2563e 100644 --- a/src/perform_step/explicit_rk_perform_step.jl +++ b/src/perform_step/explicit_rk_perform_step.jl @@ -48,10 +48,9 @@ end u = uprev + dt * accum if integrator.alg isa CompositeAlgorithm - # Hairer II, page 22 - ϱu = integrator.opts.internalnorm(kk[end] - kk[end - 1], t) - ϱd = integrator.opts.internalnorm(u - u_beforefinal, t) - integrator.eigen_est = ϱu / ϱd + # Hairer II, page 22 modified to use Inf norm + n = maximum(abs.((kk[end] .- kk[end - 1]) / (u .- u_beforefinal))) + integrator.eigen_est = integrator.opts.internalnorm(n, t) end if integrator.opts.adaptive @@ -221,12 +220,9 @@ end end if integrator.alg isa CompositeAlgorithm - # Hairer II, page 22 - @.. broadcast=false utilde=kk[end] - kk[end - 1] - ϱu = integrator.opts.internalnorm(utilde, t) - @.. broadcast=false utilde=u - tmp - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + # Hairer II, page 22 modified to use Inf norm + @.. broadcast=false utilde=abs((kk[end] - kk[end - 1]) / (u - tmp)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(utilde), t) end if integrator.opts.adaptive diff --git a/src/perform_step/low_order_rk_perform_step.jl b/src/perform_step/low_order_rk_perform_step.jl index 0f842f1b88..bf573d16ad 100644 --- a/src/perform_step/low_order_rk_perform_step.jl +++ b/src/perform_step/low_order_rk_perform_step.jl @@ -761,9 +761,8 @@ end integrator.stats.nf += 6 if integrator.alg isa CompositeAlgorithm g7 = u - # Hairer II, page 22 - integrator.eigen_est = integrator.opts.internalnorm(k7 - k6, t) / - integrator.opts.internalnorm(g7 - g6, t) + # Hairer II, page 22 modified to use the Inf norm + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.(k7 .- k6) ./ (g7 .- g6)), t) end if integrator.opts.adaptive utilde = dt * @@ -836,12 +835,9 @@ end if integrator.alg isa CompositeAlgorithm g7 = u g6 = tmp - # Hairer II, page 22 - @.. broadcast=false thread=thread utilde=k7 - k6 - ϱu = integrator.opts.internalnorm(utilde, t) - @.. broadcast=false thread=thread utilde=g7 - g6 - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + # Hairer II, page 22 modified to use Inf norm + @.. broadcast=false thread=thread utilde=abs((k7 - k6) / (g7 - g6)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(utilde), t) end if integrator.opts.adaptive @.. broadcast=false thread=thread utilde=dt * (btilde1 * k1 + btilde2 * k2 + @@ -889,9 +885,8 @@ end integrator.stats.nf += 6 if integrator.alg isa CompositeAlgorithm g7 = u - # Hairer II, page 22 - integrator.eigen_est = integrator.opts.internalnorm(k7 - k6, t) / - integrator.opts.internalnorm(g7 - g6, t) + # Hairer II, page 22 modified to use the Inf norm + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.(k7 .- k6) ./ (g7 .- g6)), t) end if integrator.opts.adaptive utilde = dt * @@ -957,12 +952,9 @@ end if integrator.alg isa CompositeAlgorithm g6 = tmp g7 = u - # Hairer II, page 22 - @.. broadcast=false thread=thread g6=g7 - g6 - ϱd = integrator.opts.internalnorm(g6, t) - @.. broadcast=false thread=thread tmp=k7 - k6 - ϱu = integrator.opts.internalnorm(tmp, t) - integrator.eigen_est = (ϱu / ϱd) * oneunit(t) + # Hairer II, page 22 modified to use Inf norm + @.. broadcast=false thread=thread utilde=abs((k7 - k6) / (g7 - g6)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(utilde) * oneunit(t), t) end if integrator.opts.adaptive @.. broadcast=false thread=thread utilde=dt * (btilde1 * k1 + btilde3 * k3 + @@ -987,71 +979,6 @@ end return nothing end -#= -@muladd function perform_step!(integrator, cache::DP5Cache, repeat_step=false) - @unpack t,dt,uprev,u,f,p = integrator - uidx = eachindex(integrator.uprev) - @unpack a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a73,a74,a75,a76,btilde1,btilde3,btilde4,btilde5,btilde6,btilde7,c1,c2,c3,c4,c5,c6 = cache.tab - @unpack k1,k2,k3,k4,k5,k6,k7,dense_tmp3,dense_tmp4,update,bspl,utilde,tmp,atmp = cache - @unpack d1,d3,d4,d5,d6,d7 = cache.tab - a = dt*a21 - @tight_loop_macros for i in uidx - @inbounds tmp[i] = uprev[i]+a*k1[i] - end - f(k2, tmp, p, t+c1*dt) - @tight_loop_macros for i in uidx - @inbounds tmp[i] = uprev[i]+dt*(a31*k1[i]+a32*k2[i]) - end - f(k3, tmp, p, t+c2*dt) - @tight_loop_macros for i in uidx - @inbounds tmp[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) - end - f(k4, tmp, p, t+c3*dt) - @tight_loop_macros for i in uidx - @inbounds tmp[i] = uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) - end - f(k5, tmp, p, t+c4*dt) - @tight_loop_macros for i in uidx - @inbounds tmp[i] = uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) - end - f(k6, tmp, p, t+dt) - @tight_loop_macros for i in uidx - @inbounds update[i] = a71*k1[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i] - @inbounds u[i] = uprev[i]+dt*update[i] - end - f(k7, u, p, t+dt) - integrator.stats.nf += 6 - if integrator.alg isa CompositeAlgorithm - g6 = tmp - g7 = u - # Hairer II, page 22 - ϱu, ϱd = zero(eltype(k7))^2, zero(eltype(g7))^2 - @inbounds for i in eachindex(k7) - ϱu += (k7[i] - k6[i])^2 - ϱd += (g7[i] - g6[i])^2 - end - integrator.eigen_est = sqrt(ϱu/ϱd)*oneunit(t) - end - if integrator.opts.adaptive - @tight_loop_macros for i in uidx - @inbounds utilde[i] = dt*(btilde1*k1[i] + btilde3*k3[i] + btilde4*k4[i] + btilde5*k5[i] + btilde6*k6[i] + btilde7*k7[i]) - end - calculate_residuals!(atmp, utilde, uprev, u, integrator.opts.abstol, integrator.opts.reltol,integrator.opts.internalnorm,t) - integrator.EEst = integrator.opts.internalnorm(atmp,t) - end - if integrator.opts.calck - @tight_loop_macros for i in uidx - #integrator.k[4] == k5 - @inbounds integrator.k[4][i] = d1*k1[i]+d3*k3[i]+d4*k4[i]+d5*k5[i]+d6*k6[i]+d7*k7[i] - #bspl == k3 - @inbounds bspl[i] = k1[i] - update[i] - # k6 === integrator.k[3] === k2 - @inbounds integrator.k[3][i] = update[i] - k7[i] - bspl[i] - end - end -end -=# - function initialize!(integrator, cache::KYK2014DGSSPRK_3S2_ConstantCache) integrator.fsalfirst = integrator.f(integrator.uprev, integrator.p, integrator.t) # Pre-start fsal integrator.stats.nf += 1 diff --git a/src/perform_step/verner_rk_perform_step.jl b/src/perform_step/verner_rk_perform_step.jl index 7c76e26719..41806f0922 100644 --- a/src/perform_step/verner_rk_perform_step.jl +++ b/src/perform_step/verner_rk_perform_step.jl @@ -40,9 +40,7 @@ end integrator.stats.nf += 8 if integrator.alg isa CompositeAlgorithm g9 = u - ϱu = integrator.opts.internalnorm(k9 - k8, t) - ϱd = integrator.opts.internalnorm(g9 - g8, t) - integrator.eigen_est = ϱu / ϱd + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.((k9 .- k8) ./ (g9 .- g8))), t) end if integrator.opts.adaptive utilde = dt * @@ -163,11 +161,8 @@ end if integrator.alg isa CompositeAlgorithm g9 = u g8 = tmp - @.. broadcast=false thread=thread rtmp=k9 - k8 - ϱu = integrator.opts.internalnorm(rtmp, t) - @.. broadcast=false thread=thread utilde=g9 - g8 - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + @.. broadcast=false thread=thread rtmp=abs((k9 - k8) / (g9 - g8)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(rtmp), t) end if integrator.opts.adaptive @.. broadcast=false thread=thread utilde=dt * (btilde1 * k1 + btilde4 * k4 + @@ -252,9 +247,7 @@ end integrator.stats.nf += 10 u = uprev + dt * (b1 * k1 + b4 * k4 + b5 * k5 + b6 * k6 + b7 * k7 + b8 * k8 + b9 * k9) if integrator.alg isa CompositeAlgorithm - ϱu = integrator.opts.internalnorm(k10 - k9, t) - ϱd = integrator.opts.internalnorm(g10 - g9, t) - integrator.eigen_est = ϱu / ϱd + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.((k10 .- k9) ./ (g10 .- g9))), t) end if integrator.opts.adaptive utilde = dt * @@ -415,11 +408,8 @@ end if integrator.alg isa CompositeAlgorithm g10 = u g9 = tmp - @.. broadcast=false thread=thread rtmp=k10 - k9 - ϱu = integrator.opts.internalnorm(rtmp, t) - @.. broadcast=false thread=thread utilde=g10 - g9 - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + @.. broadcast=false thread=thread rtmp=abs((k10 - k9) / (g10 - g9)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(rtmp), t) end if integrator.opts.adaptive @.. broadcast=false thread=thread utilde=dt * (btilde1 * k1 + btilde4 * k4 + @@ -547,9 +537,7 @@ end dt * (b1 * k1 + b6 * k6 + b7 * k7 + b8 * k8 + b9 * k9 + b10 * k10 + b11 * k11 + b12 * k12) if integrator.alg isa CompositeAlgorithm - ϱu = integrator.opts.internalnorm(k13 - k12, t) - ϱd = integrator.opts.internalnorm(g13 - g12, t) - integrator.eigen_est = ϱu / ϱd + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.((k13 .- k12) ./ (g13 .- g12))), t) end if integrator.opts.adaptive utilde = dt * @@ -739,11 +727,8 @@ end if integrator.alg isa CompositeAlgorithm g13 = u g12 = tmp - @.. broadcast=false thread=thread rtmp=k13 - k12 - ϱu = integrator.opts.internalnorm(rtmp, t) - @.. broadcast=false thread=thread utilde=g13 - g12 - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + @.. broadcast=false thread=thread rtmp = abs((k13 - k12) / (g13 - g12)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(rtmp), t) end @.. broadcast=false thread=thread u=uprev + dt * @@ -918,9 +903,7 @@ end dt * (b1 * k1 + b8 * k8 + b9 * k9 + b10 * k10 + b11 * k11 + b12 * k12 + b13 * k13 + b14 * k14 + b15 * k15) if integrator.alg isa CompositeAlgorithm - ϱu = integrator.opts.internalnorm(k16 - k15, t) - ϱd = integrator.opts.internalnorm(g16 - g15, t) - integrator.eigen_est = ϱu / ϱd + integrator.eigen_est = integrator.opts.internalnorm(maximum(abs.((k16 .- k15) ./ (g16 .- g15))), t) end if integrator.opts.adaptive utilde = dt * (btilde1 * k1 + btilde8 * k8 + btilde9 * k9 + btilde10 * k10 + @@ -1148,11 +1131,8 @@ end if integrator.alg isa CompositeAlgorithm g16 = u g15 = tmp - @.. broadcast=false thread=thread rtmp=k16 - k15 - ϱu = integrator.opts.internalnorm(rtmp, t) - @.. broadcast=false thread=thread utilde=g16 - g15 - ϱd = integrator.opts.internalnorm(utilde, t) - integrator.eigen_est = ϱu / ϱd + @.. broadcast=false thread=thread rtmp=abs((k16 - k15) / (g16 - g15)) + integrator.eigen_est = integrator.opts.internalnorm(maximum(rtmp), t) end @.. broadcast=false thread=thread u=uprev + dt * diff --git a/test/interface/default_solver_tests.jl b/test/interface/default_solver_tests.jl index b409ab087c..07aff4a3d2 100644 --- a/test/interface/default_solver_tests.jl +++ b/test/interface/default_solver_tests.jl @@ -54,13 +54,12 @@ rosensol = solve( function exrober(du, u, p, t) y₁, y₂, y₃ = u k₁, k₂, k₃ = p - du .= vcat( - [-k₁ * y₁ + k₃ * y₂ * y₃, - k₁ * y₁ - k₃ * y₂ * y₃ - k₂ * y₂^2, - k₂ * y₂^2], u[4:end]) + du .= vcat([-k₁ * y₁ + k₃ * y₂ * y₃, + k₁ * y₁ - k₃ * y₂ * y₃ - k₂ * y₂^2, + k₂ * y₂^2, ], fill(t, length(u)-3)) end -for n in (100,) # 600 should be added but currently is broken for unknown reasons +for n in (100, 600) stiffalg = n < 50 ? 4 : n < 500 ? 5 : 6 linsolve = n < 500 ? nothing : KrylovJL_GMRES() jac_prototype = sparse(I(n + 3))