Skip to content

Commit

Permalink
Merge pull request #322 from jpsamaroo/jps/amdgpu-hip
Browse files Browse the repository at this point in the history
Updates for HIP-based AMDGPU
  • Loading branch information
utkarsh530 authored Jan 16, 2024
2 parents 7971f09 + 579d106 commit 0f0d856
Show file tree
Hide file tree
Showing 12 changed files with 60 additions and 44 deletions.
15 changes: 9 additions & 6 deletions .buildkite/runtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ steps:
setup:
version:
- "1"
- "1.9"
env:
GROUP: CUDA
plugins:
Expand Down Expand Up @@ -31,7 +32,7 @@ steps:
setup:
version:
# Known issues on 1.8
- "1.9-nightly"
- "1"
env:
GROUP: AMDGPU
plugins:
Expand Down Expand Up @@ -59,7 +60,7 @@ steps:
setup:
version:
- "1"
- "1.9-nightly"
- "1.9"
env:
GROUP: oneAPI
plugins:
Expand All @@ -69,8 +70,11 @@ steps:
- |
julia --project=test -e '
import Pkg
Pkg.add(; name = "oneAPI")'
rm test/Manifest.toml
Pkg.add(; name="oneAPI")
println("+++ :julia: Building support library")
include(joinpath(dirname(dirname(Base.find_package("oneAPI"))), "deps", "build_ci.jl"))
Pkg.activate()'
julia --project -e '
import Pkg
println("+++ :julia: Running tests")
Expand All @@ -86,8 +90,7 @@ steps:
matrix:
setup:
version:
- "1"
- "1.9-nightly"
- "1.9"
env:
GROUP: Metal
plugins:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ MetalExt = ["Metal"]
oneAPIExt = ["oneAPI"]

[compat]
AMDGPU = "0.4.9"
AMDGPU = "0.5, 0.6, 0.7, 0.8"
Adapt = "3, 4"
CUDA = "4.1.0, 5"
ChainRulesCore = "1"
Expand Down
3 changes: 3 additions & 0 deletions ext/AMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import DiffEqGPU
using .AMDGPU
import .AMDGPU: ROCBackend

function DiffEqGPU.EnsembleGPUArray(cpu_offload::Float64)
DiffEqGPU.EnsembleGPUArray(ROCBackend(), cpu_offload)
end
DiffEqGPU.maxthreads(::ROCBackend) = 256
DiffEqGPU.maybe_prefer_blocks(::ROCBackend) = ROCBackend()

Expand Down
2 changes: 1 addition & 1 deletion src/ensemblegpukernel/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
reltol::T2,
repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
T3, I, R,
}
}
if save_positions != (false, false)
error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
end
Expand Down
10 changes: 5 additions & 5 deletions src/ensemblegpukernel/integrators/integrator_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ end
IIP,
S,
T,
}
}
integrator.retcode = retcode
end

Expand Down Expand Up @@ -156,7 +156,7 @@ end
S,
T,
T1,
}
}
# Can get rid of an allocation here with a function
# get_tmp_arr(integrator.cache) which gives a pointer to some
# cache array which can be modified.
Expand Down Expand Up @@ -185,7 +185,7 @@ end
S,
T,
T1,
}
}
_change_t_via_interpolation!(integrator, t, modify_save_endpoint)
end

Expand Down Expand Up @@ -322,7 +322,7 @@ end
IIP,
S,
T,
}
}
return nothing
end

Expand All @@ -333,7 +333,7 @@ end
},
callback,
abst) where {AlgType <: GPUODEAlgorithm, IIP, S, T
}
}
if abst == integrator.t
tmp = integrator.u
elseif abst == integrator.tprev
Expand Down
2 changes: 1 addition & 1 deletion src/ensemblegpukernel/integrators/nonstiff/interpolants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
IIP,
S,
T,
}
}
y₁ = integ.u
k1 = integ.k1
k2 = integ.k2
Expand Down
4 changes: 2 additions & 2 deletions src/ensemblegpukernel/integrators/stiff/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
IIP,
S,
T,
})(t) where {
})(t) where {
AlgType <:
GPUODEAlgorithm,
IIP,
S,
T,
}
}
Θ = (t - integrator.tprev) / integrator.dt
_ode_interpolant(Θ, integrator.dt, integrator.uprev, integrator)
end
Expand Down
1 change: 0 additions & 1 deletion test/distributed_multi_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ addprocs(2)
du[2] = u[1] * (p[2] - u[3]) - u[2]
du[3] = u[1] * u[2] - p[3] * u[3]
end
CUDA.allowscalar(false)
u0 = Float32[1.0; 0.0; 0.0]
tspan = (0.0f0, 100.0f0)
p = (10.0f0, 28.0f0, 8 / 3.0f0)
Expand Down
45 changes: 26 additions & 19 deletions test/ensemblegpuarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,18 @@ monteprob_jac = EnsembleProblem(prob_jac, prob_func = prob_func)
@time solve(monteprob_jac, Rodas5(), EnsembleCPUArray(), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)
@time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)
@time solve(monteprob_jac, TRBDF2(), EnsembleCPUArray(), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)
@time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)

if GROUP == "CUDA"
@time solve(monteprob_jac, Rodas5(), EnsembleGPUArray(backend), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)
@time solve(monteprob_jac, TRBDF2(), EnsembleGPUArray(backend), dt = 0.1,
trajectories = 10,
saveat = 1.0f0)
end

@info "Callbacks"

Expand Down Expand Up @@ -183,18 +186,19 @@ sol = solve(rober_prob, Rodas5(), abstol = 1.0f-8, reltol = 1.0f-8)
sol = solve(rober_prob, TRBDF2(), abstol = 1.0f-4, reltol = 1.0f-1)
rober_monteprob = EnsembleProblem(rober_prob, prob_func = prob_func)

# TODO: Does not work with Linearsolve.jl v1.35.0 https://github.com/SciML/DiffEqGPU.jl/pull/229
if GROUP == "CUDA"
@time sol = solve(rober_monteprob, Rodas5(),
EnsembleGPUArray(backend), trajectories = 10,
saveat = 1.0f0,
abstol = 1.0f-8,
reltol = 1.0f-8)
@time sol = solve(rober_monteprob, TRBDF2(),
EnsembleGPUArray(backend), trajectories = 10,
saveat = 1.0f0,
abstol = 1.0f-4,
reltol = 1.0f-1)
end

@time sol = solve(rober_monteprob, Rodas5(),
EnsembleGPUArray(backend), trajectories = 10,
saveat = 1.0f0,
abstol = 1.0f-8,
reltol = 1.0f-8)
@time sol = solve(rober_monteprob, TRBDF2(),
EnsembleGPUArray(backend), trajectories = 10,
saveat = 1.0f0,
abstol = 1.0f-4,
reltol = 1.0f-1)
@time sol = solve(rober_monteprob, TRBDF2(), EnsembleThreads(),
trajectories = 10,
abstol = 1e-4, reltol = 1e-1, saveat = 1.0f0)
Expand Down Expand Up @@ -241,5 +245,8 @@ monteprob = EnsembleProblem(prob_jac,
sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend, 0.0), trajectories = 10,
adaptive = false, dt = 0.01f0, save_everystep = false)

sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0), trajectories = 10,
adaptive = false, dt = 0.01f0, save_everystep = false)
if GROUP == "CUDA"
sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend, 0.0),
trajectories = 10,
adaptive = false, dt = 0.01f0, save_everystep = false)
end
15 changes: 9 additions & 6 deletions test/ensemblegpuarray_oop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ prob_func = (prob, i, repeat) -> remake(prob, p = rand(Float32, 3) .* p)
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
@time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(backend), trajectories = 10_000,
saveat = 1.0f0)
@time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend),
trajectories = 10_000,
saveat = 1.0f0)
@time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend),
trajectories = 10_000,
saveat = 1.0f0)

if GROUP == "CUDA"
@time sol = solve(monteprob, Rosenbrock23(), EnsembleGPUArray(backend),
trajectories = 10_000,
saveat = 1.0f0)
@time sol = solve(monteprob, TRBDF2(), EnsembleGPUArray(backend),
trajectories = 10_000,
saveat = 1.0f0)
end
2 changes: 1 addition & 1 deletion test/lower_level_api.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using DiffEqGPU, StaticArrays, CUDA, Adapt, OrdinaryDiffEq
using DiffEqGPU, StaticArrays, Adapt, OrdinaryDiffEq

include("utils.jl")

Expand Down
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ if GROUP in SUPPORTS_DOUBLE_PRECISION
end
end

if GROUP == "CUDA"
# Callbacks currently error on v1.10
if GROUP == "CUDA" && VERSION <= v"1.9"
# Causes dynamic function invocation
@time @testset "GPU Kernelized Non Stiff ODE ContinuousCallback" begin
include("gpu_kernel_de/gpu_ode_continuous_callbacks.jl")
Expand Down

0 comments on commit 0f0d856

Please sign in to comment.