Skip to content

Commit

Permalink
Refactor tests based on new GPU cache (#60)
Browse files Browse the repository at this point in the history
* Start

* Minor fix

* Add tests

* Fix errors from upstream

* Add tests

* Add tests

* Add tests
  • Loading branch information
huiyuxie authored Sep 30, 2024
1 parent 749ea41 commit 699f513
Show file tree
Hide file tree
Showing 77 changed files with 7,777 additions and 4,887 deletions.
3 changes: 2 additions & 1 deletion src/TrixiCUDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ using Trixi: AbstractEquations, AbstractContainer,
L2MortarContainer2D, L2MortarContainer3D,
True, False,
TreeMesh, DGSEM,
BoundaryConditionPeriodic, SemidiscretizationHyperbolic,
SemidiscretizationHyperbolic,
BoundaryConditionPeriodic, BoundaryConditionDirichlet,
VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG,
LobattoLegendreMortarL2,
flux, ntuple, nvariables, nnodes, nelements, nmortars,
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Copy data from CPU to GPU
function copy_to_gpu!(du::PtrArray, u::PtrArray)
du = CuArray{Float64}(zero(du))
du = CUDA.zeros(Float64, size(du)...)
u = CuArray{Float64}(u)

return (du, u)
Expand Down
1 change: 0 additions & 1 deletion src/solvers/containers_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ mutable struct ElementContainerGPU3D{RealT <: Real, uEltype <: Real} <: Abstract
surface_flux_values::CuArray{uEltype, 5}
cell_ids::CuArray{Int, 1}

# Inner constructor
# Inner constructor
function ElementContainerGPU3D{RealT, uEltype}(dims_inverse_jacobian::NTuple{1, Int},
dims_node_coordinates::NTuple{5, Int},
Expand Down
29 changes: 15 additions & 14 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dg = CUDA.zeros(Int, length(alpha))
element_ids_dgfv = CUDA.zeros(Int, length(alpha))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
Expand Down Expand Up @@ -773,8 +773,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dg = CUDA.zeros(Int, length(alpha))
element_ids_dgfv = CUDA.zeros(Int, length(alpha))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
Expand Down Expand Up @@ -966,14 +966,15 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup
lasts = zero(n_boundaries_per_direction)
firsts = zero(n_boundaries_per_direction)

# May introduce kernel launching overhead
last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts,
n_boundaries_per_direction)
last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction;
configurator_1d(last_first_indices_kernel, lasts)...)

lasts, firsts = Array(lasts), Array(firsts)
boundary_arr = CuArray{Int64}(firsts[1]:lasts[2])
indices_arr = CuArray{Int64}([firsts[1], firsts[2]])
indices_arr = firsts
boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end])

boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)

boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values,
Expand Down Expand Up @@ -1009,30 +1010,30 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup
lasts = zero(n_boundaries_per_direction)
firsts = zero(n_boundaries_per_direction)

# May introduce kernel launching overhead
last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts,
n_boundaries_per_direction)
last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction;
configurator_1d(last_first_indices_kernel, lasts)...)

lasts, firsts = Array(lasts), Array(firsts)
boundary_arr = CuArray{Int64}(firsts[1]:lasts[2])
indices_arr = CuArray{Int64}([firsts[1], firsts[2]])
indices_arr = firsts
boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end])

# Replace with callable functions (not necessary here)
# boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)
boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)

boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values,
boundaries_u, node_coordinates,
t, boundary_arr, indices_arr,
neighbor_ids, neighbor_sides,
orientations,
boundary_conditions,
boundary_conditions_callable,
equations,
surface_flux,
nonconservative_flux)
boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr,
indices_arr, neighbor_ids, neighbor_sides, orientations,
boundary_conditions, equations, surface_flux, nonconservative_flux;
boundary_conditions_callable, equations, surface_flux,
nonconservative_flux;
configurator_1d(boundary_flux_kernel, boundary_arr)...)

return nothing
Expand Down
33 changes: 16 additions & 17 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1125,8 +1125,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dg = CUDA.zeros(Int, length(alpha))
element_ids_dgfv = CUDA.zeros(Int, length(alpha))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
Expand Down Expand Up @@ -1209,8 +1209,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dg = CUDA.zeros(Int, length(alpha))
element_ids_dgfv = CUDA.zeros(Int, length(alpha))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
Expand Down Expand Up @@ -1440,16 +1440,16 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{2}, boundary_conditions::NamedTup
lasts = zero(n_boundaries_per_direction)
firsts = zero(n_boundaries_per_direction)

# May introduce kernel launching overhead
last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts,
n_boundaries_per_direction)
last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction;
configurator_1d(last_first_indices_kernel, lasts)...)

lasts, firsts = Array(lasts), Array(firsts)
boundary_arr = CuArray{Int64}(firsts[1]:lasts[4])
indices_arr = CuArray{Int64}([firsts[1], firsts[2], firsts[3], firsts[4]])
boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)
indices_arr = firsts
boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end])

boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)
size_arr = CuArray{Float64}(undef, size(surface_flux_values, 2), length(boundary_arr))

boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values,
Expand Down Expand Up @@ -1485,32 +1485,31 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{2}, boundary_conditions::NamedTup
lasts = zero(n_boundaries_per_direction)
firsts = zero(n_boundaries_per_direction)

# May introduce kernel launching overhead
last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts,
n_boundaries_per_direction)
last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction;
configurator_1d(last_first_indices_kernel, lasts)...)

lasts, firsts = Array(lasts), Array(firsts)
boundary_arr = CuArray{Int64}(firsts[1]:lasts[4])
indices_arr = CuArray{Int64}([firsts[1], firsts[2], firsts[3], firsts[4]])

# Replace with callable functions (not necessary here)
# boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)
indices_arr = firsts
boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end])

boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)
size_arr = CuArray{Float64}(undef, size(surface_flux_values, 2), length(boundary_arr))

boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values,
boundaries_u, node_coordinates,
t, boundary_arr, indices_arr,
neighbor_ids, neighbor_sides,
orientations,
boundary_conditions,
boundary_conditions_callable,
equations,
surface_flux,
nonconservative_flux)
boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr,
indices_arr, neighbor_ids, neighbor_sides, orientations,
boundary_conditions, equations, surface_flux, nonconservative_flux;
boundary_conditions_callable, equations, surface_flux,
nonconservative_flux;
configurator_2d(boundary_flux_kernel, size_arr)...)

return nothing
Expand Down Expand Up @@ -1577,7 +1576,7 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
large_sides = cache.mortars.large_sides
orientations = cache.mortars.orientations

#
# The original CPU arrays hold NaNs
u_upper = cache.mortars.u_upper
u_lower = cache.mortars.u_lower
reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper)
Expand Down
Loading

0 comments on commit 699f513

Please sign in to comment.