Skip to content

Commit

Permalink
Complete 3D
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Sep 24, 2024
1 parent 648ce8d commit ef92447
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 61 deletions.
74 changes: 71 additions & 3 deletions src/solvers/cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ function create_cache_gpu(mesh::TreeMesh{2}, equations,
fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nelements(cache.elements))

cache = create_cache(mesh, equations,
VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
dg, uEltype, cache)
cache = create_cache_gpu(mesh, equations,
VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
dg, uEltype, cache)

return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R)
end
Expand All @@ -109,3 +109,71 @@ function create_cache_gpu(mesh::TreeMesh{2}, equations,

(; fstar_upper, fstar_lower)
end

# Create cache specialized for 3D tree mesh
function create_cache_gpu(mesh::TreeMesh{3}, equations,
dg::DGSEM, RealT, uEltype)
# Get cells for which an element needs to be created (i.e. all leaf cells)
leaf_cell_ids = local_leaf_cells(mesh.tree)

elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)

interfaces = init_interfaces(leaf_cell_ids, mesh, elements)

boundaries = init_boundaries(leaf_cell_ids, mesh, elements)

mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)

cache = (; elements, interfaces, boundaries, mortars)

# Add specialized parts of the cache required to compute the volume integral etc.
cache = (; cache...,
create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...)
cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...)

return cache
end

function create_cache_gpu(mesh::TreeMesh{3}, equations,
volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
uEltype, cache)
NamedTuple()
end

function create_cache_gpu(mesh::TreeMesh{3}, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
uEltype, cache)
fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nnodes(dg), nelements(cache.elements))
fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nnodes(dg), nelements(cache.elements))
fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nnodes(dg), nelements(cache.elements))
fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nnodes(dg), nelements(cache.elements))
fstar3_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
nnodes(dg) + 1, nelements(cache.elements))
fstar3_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
nnodes(dg) + 1, nelements(cache.elements))

cache = create_cache_gpu(mesh, equations,
VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
dg, uEltype, cache)

return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R)
end

function create_cache_gpu(mesh::TreeMesh{3}, equations,
mortar_l2::LobattoLegendreMortarL2, uEltype, cache)
fstar_upper_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_upper_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_lower_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_lower_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))

# Temporary arrays can also be created here
(; fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right)
end
60 changes: 20 additions & 40 deletions src/solvers/dg_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1472,18 +1472,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
size(u, 2), size(u, 5))

inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
size(u, 5)))
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
size(u, 5)))
fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
size(u, 5)))
fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
size(u, 5)))
fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
size(u, 5)))
fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
size(u, 5)))
fstar1_L = cache.fstar1_L
fstar1_R = cache.fstar1_R
fstar2_L = cache.fstar2_L
fstar2_R = cache.fstar2_R
fstar3_L = cache.fstar3_L
fstar3_R = cache.fstar3_R

size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5))

Expand Down Expand Up @@ -1576,18 +1570,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
size(u, 2), size(u, 5))

inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
size(u, 5)))
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
size(u, 5)))
fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
size(u, 5)))
fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
size(u, 5)))
fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
size(u, 5)))
fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
size(u, 5)))
fstar1_L = cache.fstar1_L
fstar1_R = cache.fstar1_R
fstar2_L = cache.fstar2_L
fstar2_R = cache.fstar2_R
fstar3_L = cache.fstar3_L
fstar3_R = cache.fstar3_R

size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5))

Expand Down Expand Up @@ -2007,14 +1995,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero

fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_upper_left = cache.fstar_upper_left
fstar_upper_right = cache.fstar_upper_right
fstar_lower_left = cache.fstar_lower_left
fstar_lower_right = cache.fstar_lower_right

size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4),
length(orientations))
Expand Down Expand Up @@ -2099,14 +2083,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero

fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
size(u_upper_left, 4), length(orientations))
fstar_upper_left = cache.fstar_upper_left
fstar_upper_right = cache.fstar_upper_right
fstar_lower_left = cache.fstar_lower_left
fstar_lower_right = cache.fstar_lower_right

size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4),
length(orientations))
Expand Down
47 changes: 29 additions & 18 deletions test/test_script.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
include("test_trixicuda.jl")

advection_velocity = (0.2, -0.7)
equations = LinearScalarAdvectionEquation2D(advection_velocity)

initial_condition = initial_condition_convergence_test
solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)

coordinates_min = (-1.0, -1.0)
coordinates_max = (1.0, 1.0)
refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0),
coordinates_max = (1.0, 1.0)),)
equations = CompressibleEulerEquations3D(1.4)

initial_condition = initial_condition_weak_blast_wave

surface_flux = flux_ranocha # OBS! Using a non-dissipative flux is only sensible to test EC,
# but not for real shock simulations
volume_flux = flux_ranocha
polydeg = 3
basis = LobattoLegendreBasis(polydeg)
indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5,
alpha_min = 0.001,
alpha_smooth = true,
variable = density_pressure)
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)
solver = DGSEM(basis, surface_flux, volume_integral)

coordinates_min = (-2.0, -2.0, -2.0)
coordinates_max = (2.0, 2.0, 2.0)
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 2,
refinement_patches = refinement_patches,
n_cells_max = 10_000)
initial_refinement_level = 3,
n_cells_max = 100_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)

tspan = (0.0, 1.0)
tspan = (0.0, 5.0)

# Get CPU data
(; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi
Expand Down Expand Up @@ -74,8 +84,7 @@ Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, so

# Test `cuda_boundary_flux!`
TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu,
Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
solver_gpu, cache_gpu)
Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
solver.surface_integral, solver)
Expand All @@ -86,8 +95,10 @@ TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(c
solver_gpu, cache_gpu)
Trixi.prolong2mortars!(cache, u, mesh, equations,
solver.mortar, solver.surface_integral, solver)
@test_approx cache.mortars.u_upper cache_gpu.mortars.u_upper
@test_approx cache.mortars.u_lower cache_gpu.mortars.u_lower
@test_approx cache_gpu.mortars.u_upper_left cache.mortars.u_upper_left
@test_approx cache_gpu.mortars.u_upper_right cache.mortars.u_upper_right
@test_approx cache_gpu.mortars.u_lower_left cache.mortars.u_lower_left
@test_approx cache_gpu.mortars.u_lower_right cache.mortars.u_lower_right

# Test `cuda_mortar_flux!`
TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu),
Expand Down

0 comments on commit ef92447

Please sign in to comment.