From c528f760fb5782a4bf0eb21d043fa8bb01e24503 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Wed, 10 Jan 2024 08:18:07 -0700 Subject: [PATCH 1/7] Updates for HIP-based AMDGPU --- Project.toml | 2 +- ext/AMDGPUExt.jl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3158da47..e872f45b 100644 --- a/Project.toml +++ b/Project.toml @@ -38,7 +38,7 @@ MetalExt = ["Metal"] oneAPIExt = ["oneAPI"] [compat] -AMDGPU = "0.4.9" +AMDGPU = "0.5, 0.6, 0.7, 0.8" Adapt = "3, 4" CUDA = "4.1.0, 5" ChainRulesCore = "1" diff --git a/ext/AMDGPUExt.jl b/ext/AMDGPUExt.jl index 096f5a39..01fa2ba0 100644 --- a/ext/AMDGPUExt.jl +++ b/ext/AMDGPUExt.jl @@ -5,6 +5,9 @@ import DiffEqGPU using .AMDGPU import .AMDGPU: ROCBackend +function DiffEqGPU.EnsembleGPUArray(cpu_offload::Float64) + DiffEqGPU.EnsembleGPUArray(ROCBackend(), cpu_offload) +end DiffEqGPU.maxthreads(::ROCBackend) = 256 DiffEqGPU.maybe_prefer_blocks(::ROCBackend) = ROCBackend() From 436fb7d2981b19c2307c07be68fe29ebc0ab2f92 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Wed, 10 Jan 2024 14:34:40 -0500 Subject: [PATCH 2/7] Test AMDGPU on Julia v1 --- .buildkite/runtests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index 11e650fa..47456437 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -31,7 +31,7 @@ steps: setup: version: # Known issues on 1.8 - - "1.9-nightly" + - "1" env: GROUP: AMDGPU plugins: From 5ccc9165e09518ebecb0b8b68dbc73abff1b14f7 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Mon, 15 Jan 2024 23:28:48 -0500 Subject: [PATCH 3/7] Update CI versions --- .buildkite/runtests.yml | 13 ++++++++----- src/ensemblegpukernel/callbacks.jl | 2 +- .../integrators/integrator_utils.jl | 10 +++++----- .../integrators/nonstiff/interpolants.jl | 2 +- src/ensemblegpukernel/integrators/stiff/types.jl | 4 ++-- test/ensemblegpuarray.jl | 2 ++ test/ensemblegpuarray_oop.jl | 3 +++ test/runtests.jl | 3 ++- 8 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index 47456437..4a25c3a8 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -4,6 +4,7 @@ steps: setup: version: - "1" + - "1.9" env: GROUP: CUDA plugins: @@ -59,7 +60,7 @@ steps: setup: version: - "1" - - "1.9-nightly" + - "1.9" env: GROUP: oneAPI plugins: @@ -69,8 +70,11 @@ steps: - | julia --project=test -e ' import Pkg - Pkg.add(; name = "oneAPI")' - rm test/Manifest.toml + Pkg.develop(; name="oneAPI") + + println("+++ :julia: Building support library") + include(joinpath(Pkg.devdir(), "oneAPI", "deps", "build_ci.jl")) + Pkg.activate()' julia --project -e ' import Pkg println("+++ :julia: Running tests") @@ -86,8 +90,7 @@ steps: matrix: setup: version: - - "1" - - "1.9-nightly" + - "1.9" env: GROUP: Metal plugins: diff --git a/src/ensemblegpukernel/callbacks.jl b/src/ensemblegpukernel/callbacks.jl index d95c251a..aa537a9c 100644 --- a/src/ensemblegpukernel/callbacks.jl +++ b/src/ensemblegpukernel/callbacks.jl @@ -47,7 +47,7 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <: reltol::T2, repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2, T3, I, R, - } + } if save_positions != (false, false) error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.") end diff --git a/src/ensemblegpukernel/integrators/integrator_utils.jl b/src/ensemblegpukernel/integrators/integrator_utils.jl index f2d4f7da..bad07883 100644 --- a/src/ensemblegpukernel/integrators/integrator_utils.jl +++ b/src/ensemblegpukernel/integrators/integrator_utils.jl @@ -54,7 +54,7 @@ end IIP, S, T, - } +} integrator.retcode = retcode end @@ -156,7 +156,7 @@ end S, T, T1, - } +} # Can get rid of an allocation here with a function # get_tmp_arr(integrator.cache) which gives a pointer to some # cache array which can be modified. @@ -185,7 +185,7 @@ end S, T, T1, - } +} _change_t_via_interpolation!(integrator, t, modify_save_endpoint) end @@ -322,7 +322,7 @@ end IIP, S, T, - } +} return nothing end @@ -333,7 +333,7 @@ end }, callback, abst) where {AlgType <: GPUODEAlgorithm, IIP, S, T - } +} if abst == integrator.t tmp = integrator.u elseif abst == integrator.tprev diff --git a/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl b/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl index 134ea1aa..7163ec4b 100644 --- a/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl +++ b/src/ensemblegpukernel/integrators/nonstiff/interpolants.jl @@ -8,7 +8,7 @@ IIP, S, T, - } +} y₁ = integ.u k1 = integ.k1 k2 = integ.k2 diff --git a/src/ensemblegpukernel/integrators/stiff/types.jl b/src/ensemblegpukernel/integrators/stiff/types.jl index f61e4a14..021f8bf1 100644 --- a/src/ensemblegpukernel/integrators/stiff/types.jl +++ b/src/ensemblegpukernel/integrators/stiff/types.jl @@ -3,13 +3,13 @@ IIP, S, T, - })(t) where { +})(t) where { AlgType <: GPUODEAlgorithm, IIP, S, T, - } +} Θ = (t - integrator.tprev) / integrator.dt _ode_interpolant(Θ, integrator.dt, integrator.uprev, integrator) end diff --git a/test/ensemblegpuarray.jl b/test/ensemblegpuarray.jl index 0979e476..53db34f9 100644 --- a/test/ensemblegpuarray.jl +++ b/test/ensemblegpuarray.jl @@ -43,6 +43,8 @@ solve(monteprob,TRBDF2(),EnsembleGPUArray(backend),dt=0.1,trajectories=2,saveat= @test_broken solve(monteprob,TRBDF2(),EnsembleGPUArray(backend),dt=0.1,trajectories=2,saveat=1.0f0) =# +GROUP == "AMDGPU" && return + @info "Implicit Methods" function lorenz_jac(J, u, p, t) diff --git a/test/ensemblegpuarray_oop.jl b/test/ensemblegpuarray_oop.jl index f7c8ca17..37a0660e 100644 --- a/test/ensemblegpuarray_oop.jl +++ b/test/ensemblegpuarray_oop.jl @@ -34,6 +34,9 @@ prob_func = (prob, i, repeat) -> remake(prob, p = rand(Float32, 3) .* p) monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) @time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend), trajectories = 10_000, saveat = 1.0f0) + +GROUP == "AMDGPU" && return + @time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend), trajectories = 10_000, saveat = 1.0f0) diff --git a/test/runtests.jl b/test/runtests.jl index 175c36ea..95afa694 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -79,7 +79,8 @@ if GROUP in SUPPORTS_DOUBLE_PRECISION end end -if GROUP == "CUDA" +# Callbacks currently error on v1.10 +if GROUP == "CUDA" && VERSION <= v"1.9" # Causes dynamic function invocation @time @testset "GPU Kernelized Non Stiff ODE ContinuousCallback" begin include("gpu_kernel_de/gpu_ode_continuous_callbacks.jl") From 09085d7e939883cf14932e5f08e49c69606d19b6 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Mon, 15 Jan 2024 23:35:15 -0500 Subject: [PATCH 4/7] Try adding oneAPI instead of dev --- .buildkite/runtests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index 4a25c3a8..7a7ff5ce 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -70,10 +70,10 @@ steps: - | julia --project=test -e ' import Pkg - Pkg.develop(; name="oneAPI") + Pkg.add(; name="oneAPI") println("+++ :julia: Building support library") - include(joinpath(Pkg.devdir(), "oneAPI", "deps", "build_ci.jl")) + include(joinpath(dirname(dirname(Base.find_package("oneAPI"))), "deps", "build_ci.jl")) Pkg.activate()' julia --project -e ' import Pkg From 3d1e57a9875bbd2f437d60f893ec24d3238752b1 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Mon, 15 Jan 2024 23:57:28 -0500 Subject: [PATCH 5/7] Separate implicit methods for CUDA only --- test/ensemblegpuarray.jl | 47 ++++++++++++++++++++---------------- test/ensemblegpuarray_oop.jl | 16 ++++++------ 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/test/ensemblegpuarray.jl b/test/ensemblegpuarray.jl index 53db34f9..8b26ac7a 100644 --- a/test/ensemblegpuarray.jl +++ b/test/ensemblegpuarray.jl @@ -43,8 +43,6 @@ solve(monteprob,TRBDF2(),EnsembleGPUArray(backend),dt=0.1,trajectories=2,saveat= @test_broken solve(monteprob,TRBDF2(),EnsembleGPUArray(backend),dt=0.1,trajectories=2,saveat=1.0f0) =# -GROUP == "AMDGPU" && return - @info "Implicit Methods" function lorenz_jac(J, u, p, t) @@ -76,15 +74,18 @@ monteprob_jac = EnsembleProblem(prob_jac, prob_func = prob_func) @time solve(monteprob_jac, Rodas5(), EnsembleCPUArray(), dt = 0.1, trajectories = 10, saveat = 1.0f0) -@time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1, - trajectories = 10, - saveat = 1.0f0) @time solve(monteprob_jac, TRBDF2(), EnsembleCPUArray(), dt = 0.1, trajectories = 10, saveat = 1.0f0) -@time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1, - trajectories = 10, - saveat = 1.0f0) + +if GROUP == "CUDA" + @time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1, + trajectories = 10, + saveat = 1.0f0) + @time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1, + trajectories = 10, + saveat = 1.0f0) +end @info "Callbacks" @@ -185,18 +186,19 @@ sol = solve(rober_prob, Rodas5(), abstol = 1.0f-8, reltol = 1.0f-8) sol = solve(rober_prob, TRBDF2(), abstol = 1.0f-4, reltol = 1.0f-1) rober_monteprob = EnsembleProblem(rober_prob, prob_func = prob_func) -# TODO: Does not work with Linearsolve.jl v1.35.0 https://github.com/SciML/DiffEqGPU.jl/pull/229 +if GROUP == "CUDA" + @time sol = solve(rober_monteprob, Rodas5(), + EnsembleGPUArray(backend), trajectories = 10, + saveat = 1.0f0, + abstol = 1.0f-8, + reltol = 1.0f-8) + @time sol = solve(rober_monteprob, TRBDF2(), + EnsembleGPUArray(backend), trajectories = 10, + saveat = 1.0f0, + abstol = 1.0f-4, + reltol = 1.0f-1) +end -@time sol = solve(rober_monteprob, Rodas5(), - EnsembleGPUArray(backend), trajectories = 10, - saveat = 1.0f0, - abstol = 1.0f-8, - reltol = 1.0f-8) -@time sol = solve(rober_monteprob, TRBDF2(), - EnsembleGPUArray(backend), trajectories = 10, - saveat = 1.0f0, - abstol = 1.0f-4, - reltol = 1.0f-1) @time sol = solve(rober_monteprob, TRBDF2(), EnsembleThreads(), trajectories = 10, abstol = 1e-4, reltol = 1e-1, saveat = 1.0f0) @@ -243,5 +245,8 @@ monteprob = EnsembleProblem(prob_jac, sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend, 0.0), trajectories = 10, adaptive = false, dt = 0.01f0, save_everystep = false) -sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0), trajectories = 10, - adaptive = false, dt = 0.01f0, save_everystep = false) +if GROUP == "CUDA" + sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0), + trajectories = 10, + adaptive = false, dt = 0.01f0, save_everystep = false) +end diff --git a/test/ensemblegpuarray_oop.jl b/test/ensemblegpuarray_oop.jl index 37a0660e..534ba283 100644 --- a/test/ensemblegpuarray_oop.jl +++ b/test/ensemblegpuarray_oop.jl @@ -35,11 +35,11 @@ monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) @time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend), trajectories = 10_000, saveat = 1.0f0) -GROUP == "AMDGPU" && return - -@time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend), - trajectories = 10_000, - saveat = 1.0f0) -@time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend), - trajectories = 10_000, - saveat = 1.0f0) +if GROUP == "CUDA" + @time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend), + trajectories = 10_000, + saveat = 1.0f0) + @time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend), + trajectories = 10_000, + saveat = 1.0f0) +end From 56ba673cc14cd45caa501e49a5aa8f5efcfd03ef Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 16 Jan 2024 00:08:03 -0500 Subject: [PATCH 6/7] Update distributed_multi_gpu.jl --- test/distributed_multi_gpu.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/distributed_multi_gpu.jl b/test/distributed_multi_gpu.jl index 17df474f..f6ee675d 100644 --- a/test/distributed_multi_gpu.jl +++ b/test/distributed_multi_gpu.jl @@ -9,7 +9,6 @@ addprocs(2) du[2] = u[1] * (p[2] - u[3]) - u[2] du[3] = u[1] * u[2] - p[3] * u[3] end - CUDA.allowscalar(false) u0 = Float32[1.0; 0.0; 0.0] tspan = (0.0f0, 100.0f0) p = (10.0f0, 28.0f0, 8 / 3.0f0) From 579d106c263dd76eaf1255d19f76015ba59bdaed Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 16 Jan 2024 00:17:05 -0500 Subject: [PATCH 7/7] Update lower_level_api.jl --- test/lower_level_api.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lower_level_api.jl b/test/lower_level_api.jl index d472b470..2400cc3f 100644 --- a/test/lower_level_api.jl +++ b/test/lower_level_api.jl @@ -1,4 +1,4 @@ -using DiffEqGPU, StaticArrays, CUDA, Adapt, OrdinaryDiffEq +using DiffEqGPU, StaticArrays, Adapt, OrdinaryDiffEq include("utils.jl")