diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index 11e650fa..7a7ff5ce 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -4,6 +4,7 @@ steps: setup: version: - "1" + - "1.9" env: GROUP: CUDA plugins: @@ -31,7 +32,7 @@ steps: setup: version: # Known issues on 1.8 - - "1.9-nightly" + - "1" env: GROUP: AMDGPU plugins: @@ -59,7 +60,7 @@ steps: setup: version: - "1" - - "1.9-nightly" + - "1.9" env: GROUP: oneAPI plugins: @@ -69,8 +70,11 @@ steps: - | julia --project=test -e ' import Pkg - Pkg.add(; name = "oneAPI")' - rm test/Manifest.toml + Pkg.add(; name="oneAPI") + + println("+++ :julia: Building support library") + include(joinpath(dirname(dirname(Base.find_package("oneAPI"))), "deps", "build_ci.jl")) + Pkg.activate()' julia --project -e ' import Pkg println("+++ :julia: Running tests") @@ -86,8 +90,7 @@ steps: matrix: setup: version: - - "1" - - "1.9-nightly" + - "1.9" env: GROUP: Metal plugins: diff --git a/Project.toml b/Project.toml index 3158da47..e872f45b 100644 --- a/Project.toml +++ b/Project.toml @@ -38,7 +38,7 @@ MetalExt = ["Metal"] oneAPIExt = ["oneAPI"] [compat] -AMDGPU = "0.4.9" +AMDGPU = "0.5, 0.6, 0.7, 0.8" Adapt = "3, 4" CUDA = "4.1.0, 5" ChainRulesCore = "1" diff --git a/ext/AMDGPUExt.jl b/ext/AMDGPUExt.jl index 096f5a39..01fa2ba0 100644 --- a/ext/AMDGPUExt.jl +++ b/ext/AMDGPUExt.jl @@ -5,6 +5,9 @@ import DiffEqGPU using .AMDGPU import .AMDGPU: ROCBackend +function DiffEqGPU.EnsembleGPUArray(cpu_offload::Float64) + DiffEqGPU.EnsembleGPUArray(ROCBackend(), cpu_offload) +end DiffEqGPU.maxthreads(::ROCBackend) = 256 DiffEqGPU.maybe_prefer_blocks(::ROCBackend) = ROCBackend() diff --git a/src/ensemblegpukernel/callbacks.jl b/src/ensemblegpukernel/callbacks.jl index d95c251a..aa537a9c 100644 --- a/src/ensemblegpukernel/callbacks.jl +++ b/src/ensemblegpukernel/callbacks.jl @@ -47,7 +47,7 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <: reltol::T2, repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2, T3, I, R, - } + } if save_positions != (false, false) error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.") end diff --git a/src/ensemblegpukernel/integrators/integrator_utils.jl b/src/ensemblegpukernel/integrators/integrator_utils.jl index f2d4f7da..bad07883 100644 --- a/src/ensemblegpukernel/integrators/integrator_utils.jl +++ b/src/ensemblegpukernel/integrators/integrator_utils.jl @@ -54,7 +54,7 @@ end IIP, S, T, - } +} integrator.retcode = retcode end @@ -156,7 +156,7 @@ end S, T, T1, - } +} # Can get rid of an allocation here with a function # get_tmp_arr(integrator.cache) which gives a pointer to some # cache array which can be modified. @@ -185,7 +185,7 @@ end S, T, T1, - } +} _change_t_via_interpolation!(integrator, t, modify_save_endpoint) end @@ -322,7 +322,7 @@ end IIP, S, T, - } +} return nothing end @@ -333,7 +333,7 @@ end }, callback, abst) where {AlgType <: GPUODEAlgorithm, IIP, S, T - } +} if abst == integrator.t tmp = integrator.u elseif abst == integrator.tprev diff --git a/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl b/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl index 134ea1aa..7163ec4b 100644 --- a/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl +++ b/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl @@ -8,7 +8,7 @@ IIP, S, T, - } +} y₁ = integ.u k1 = integ.k1 k2 = integ.k2 diff --git a/src/ensemblegpukernel/integrators/stiff/types.jl b/src/ensemblegpukernel/integrators/stiff/types.jl index f61e4a14..021f8bf1 100644 --- a/src/ensemblegpukernel/integrators/stiff/types.jl +++ b/src/ensemblegpukernel/integrators/stiff/types.jl @@ -3,13 +3,13 @@ IIP, S, T, - })(t) where { +})(t) where { AlgType <: GPUODEAlgorithm, IIP, S, T, - } +} Θ = (t - integrator.tprev) / integrator.dt _ode_interpolant(Θ, integrator.dt, integrator.uprev, integrator) end diff --git a/test/distributed_multi_gpu.jl b/test/distributed_multi_gpu.jl index 17df474f..f6ee675d 100644 --- a/test/distributed_multi_gpu.jl +++ b/test/distributed_multi_gpu.jl @@ -9,7 +9,6 @@ addprocs(2) du[2] = u[1] * (p[2] - u[3]) - u[2] du[3] = u[1] * u[2] - p[3] * u[3] end - CUDA.allowscalar(false) u0 = Float32[1.0; 0.0; 0.0] tspan = (0.0f0, 100.0f0) p = (10.0f0, 28.0f0, 8 / 3.0f0) diff --git a/test/ensemblegpuarray.jl b/test/ensemblegpuarray.jl index 0979e476..8b26ac7a 100644 --- a/test/ensemblegpuarray.jl +++ b/test/ensemblegpuarray.jl @@ -74,15 +74,18 @@ monteprob_jac = EnsembleProblem(prob_jac, prob_func = prob_func) @time solve(monteprob_jac, Rodas5(), EnsembleCPUArray(), dt = 0.1, trajectories = 10, saveat = 1.0f0) -@time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1, - trajectories = 10, - saveat = 1.0f0) @time solve(monteprob_jac, TRBDF2(), EnsembleCPUArray(), dt = 0.1, trajectories = 10, saveat = 1.0f0) -@time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1, - trajectories = 10, - saveat = 1.0f0) + +if GROUP == "CUDA" + @time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1, + trajectories = 10, + saveat = 1.0f0) + @time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1, + trajectories = 10, + saveat = 1.0f0) +end @info "Callbacks" @@ -183,18 +186,19 @@ sol = solve(rober_prob, Rodas5(), abstol = 1.0f-8, reltol = 1.0f-8) sol = solve(rober_prob, TRBDF2(), abstol = 1.0f-4, reltol = 1.0f-1) rober_monteprob = EnsembleProblem(rober_prob, prob_func = prob_func) -# TODO: Does not work with Linearsolve.jl v1.35.0 https://github.com/SciML/DiffEqGPU.jl/pull/229 +if GROUP == "CUDA" + @time sol = solve(rober_monteprob, Rodas5(), + EnsembleGPUArray(backend), trajectories = 10, + saveat = 1.0f0, + abstol = 1.0f-8, + reltol = 1.0f-8) + @time sol = solve(rober_monteprob, TRBDF2(), + EnsembleGPUArray(backend), trajectories = 10, + saveat = 1.0f0, + abstol = 1.0f-4, + reltol = 1.0f-1) +end -@time sol = solve(rober_monteprob, Rodas5(), - EnsembleGPUArray(backend), trajectories = 10, - saveat = 1.0f0, - abstol = 1.0f-8, - reltol = 1.0f-8) -@time sol = solve(rober_monteprob, TRBDF2(), - EnsembleGPUArray(backend), trajectories = 10, - saveat = 1.0f0, - abstol = 1.0f-4, - reltol = 1.0f-1) @time sol = solve(rober_monteprob, TRBDF2(), EnsembleThreads(), trajectories = 10, abstol = 1e-4, reltol = 1e-1, saveat = 1.0f0) @@ -241,5 +245,8 @@ monteprob = EnsembleProblem(prob_jac, sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend, 0.0), trajectories = 10, adaptive = false, dt = 0.01f0, save_everystep = false) -sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0), trajectories = 10, - adaptive = false, dt = 0.01f0, save_everystep = false) +if GROUP == "CUDA" + sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0), + trajectories = 10, + adaptive = false, dt = 0.01f0, save_everystep = false) +end diff --git a/test/ensemblegpuarray_oop.jl b/test/ensemblegpuarray_oop.jl index f7c8ca17..534ba283 100644 --- a/test/ensemblegpuarray_oop.jl +++ b/test/ensemblegpuarray_oop.jl @@ -34,9 +34,12 @@ prob_func = (prob, i, repeat) -> remake(prob, p = rand(Float32, 3) .* p) monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) @time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend), trajectories = 10_000, saveat = 1.0f0) -@time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend), - trajectories = 10_000, - saveat = 1.0f0) -@time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend), - trajectories = 10_000, - saveat = 1.0f0) + +if GROUP == "CUDA" + @time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend), + trajectories = 10_000, + saveat = 1.0f0) + @time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend), + trajectories = 10_000, + saveat = 1.0f0) +end diff --git a/test/lower_level_api.jl b/test/lower_level_api.jl index d472b470..2400cc3f 100644 --- a/test/lower_level_api.jl +++ b/test/lower_level_api.jl @@ -1,4 +1,4 @@ -using DiffEqGPU, StaticArrays, CUDA, Adapt, OrdinaryDiffEq +using DiffEqGPU, StaticArrays, Adapt, OrdinaryDiffEq include("utils.jl") diff --git a/test/runtests.jl b/test/runtests.jl index 175c36ea..95afa694 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -79,7 +79,8 @@ if GROUP in SUPPORTS_DOUBLE_PRECISION end end -if GROUP == "CUDA" +# Callbacks currently error on v1.10 +if GROUP == "CUDA" && VERSION <= v"1.9" # Causes dynamic function invocation @time @testset "GPU Kernelized Non Stiff ODE ContinuousCallback" begin include("gpu_kernel_de/gpu_ode_continuous_callbacks.jl")