Complete 3D

trixi-gpu · Sep 24, 2024 · ef92447 · ef92447
1 parent 648ce8d
commit ef92447
Show file tree

Hide file tree

Showing 3 changed files with 120 additions and 61 deletions.
diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl
@@ -93,9 +93,9 @@ function create_cache_gpu(mesh::TreeMesh{2}, equations,
     fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
                           nelements(cache.elements))
 
-    cache = create_cache(mesh, equations,
-                         VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
-                         dg, uEltype, cache)
+    cache = create_cache_gpu(mesh, equations,
+                             VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                             dg, uEltype, cache)
 
     return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R)
 end
@@ -109,3 +109,71 @@ function create_cache_gpu(mesh::TreeMesh{2}, equations,
 
     (; fstar_upper, fstar_lower)
 end
+
+# Create cache specialized for 3D tree mesh
+function create_cache_gpu(mesh::TreeMesh{3}, equations,
+                          dg::DGSEM, RealT, uEltype)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+
+    mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+
+    cache = (; elements, interfaces, boundaries, mortars)
+
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...)
+    cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...)
+
+    return cache
+end
+
+function create_cache_gpu(mesh::TreeMesh{3}, equations,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
+                          uEltype, cache)
+    NamedTuple()
+end
+
+function create_cache_gpu(mesh::TreeMesh{3}, equations,
+                          volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
+                          uEltype, cache)
+    fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
+                          nnodes(dg), nelements(cache.elements))
+    fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
+                          nnodes(dg), nelements(cache.elements))
+    fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
+                          nnodes(dg), nelements(cache.elements))
+    fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
+                          nnodes(dg), nelements(cache.elements))
+    fstar3_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
+                          nnodes(dg) + 1, nelements(cache.elements))
+    fstar3_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
+                          nnodes(dg) + 1, nelements(cache.elements))
+
+    cache = create_cache_gpu(mesh, equations,
+                             VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                             dg, uEltype, cache)
+
+    return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R)
+end
+
+function create_cache_gpu(mesh::TreeMesh{3}, equations,
+                          mortar_l2::LobattoLegendreMortarL2, uEltype, cache)
+    fstar_upper_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                                  nnodes(mortar_l2), nmortars(cache.mortars))
+    fstar_upper_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                                   nnodes(mortar_l2), nmortars(cache.mortars))
+    fstar_lower_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                                  nnodes(mortar_l2), nmortars(cache.mortars))
+    fstar_lower_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                                   nnodes(mortar_l2), nmortars(cache.mortars))
+
+    # Temporary arrays can also be created here
+    (; fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right)
+end
diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl
@@ -1472,18 +1472,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
                                         size(u, 2), size(u, 5))
 
     inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
-    fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
-                                     size(u, 5)))
-    fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
-                                     size(u, 5)))
-    fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
-                                     size(u, 5)))
-    fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
-                                     size(u, 5)))
-    fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
-                                     size(u, 5)))
-    fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
-                                     size(u, 5)))
+    fstar1_L = cache.fstar1_L
+    fstar1_R = cache.fstar1_R
+    fstar2_L = cache.fstar2_L
+    fstar2_R = cache.fstar2_R
+    fstar3_L = cache.fstar3_L
+    fstar3_R = cache.fstar3_R
 
     size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5))
 
@@ -1576,18 +1570,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
                                          size(u, 2), size(u, 5))
 
     inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
-    fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
-                                     size(u, 5)))
-    fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2),
-                                     size(u, 5)))
-    fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
-                                     size(u, 5)))
-    fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2),
-                                     size(u, 5)))
-    fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
-                                     size(u, 5)))
-    fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1,
-                                     size(u, 5)))
+    fstar1_L = cache.fstar1_L
+    fstar1_R = cache.fstar1_R
+    fstar2_L = cache.fstar2_L
+    fstar2_R = cache.fstar2_R
+    fstar3_L = cache.fstar3_L
+    fstar3_R = cache.fstar3_R
 
     size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5))
 
@@ -2007,14 +1995,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
     surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
 
-    fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                        size(u_upper_left, 4), length(orientations))
-    fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                         size(u_upper_left, 4), length(orientations))
-    fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                        size(u_upper_left, 4), length(orientations))
-    fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                         size(u_upper_left, 4), length(orientations))
+    fstar_upper_left = cache.fstar_upper_left
+    fstar_upper_right = cache.fstar_upper_right
+    fstar_lower_left = cache.fstar_lower_left
+    fstar_lower_right = cache.fstar_lower_right
 
     size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4),
                                 length(orientations))
@@ -2099,14 +2083,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
     surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
 
-    fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                        size(u_upper_left, 4), length(orientations))
-    fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                         size(u_upper_left, 4), length(orientations))
-    fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                        size(u_upper_left, 4), length(orientations))
-    fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3),
-                                         size(u_upper_left, 4), length(orientations))
+    fstar_upper_left = cache.fstar_upper_left
+    fstar_upper_right = cache.fstar_upper_right
+    fstar_lower_left = cache.fstar_lower_left
+    fstar_lower_right = cache.fstar_lower_right
 
     size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4),
                                 length(orientations))

diff --git a/test/test_script.jl b/test/test_script.jl
@@ -1,24 +1,34 @@
 include("test_trixicuda.jl")
 
-advection_velocity = (0.2, -0.7)
-equations = LinearScalarAdvectionEquation2D(advection_velocity)
-
-initial_condition = initial_condition_convergence_test
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
-
-coordinates_min = (-1.0, -1.0)
-coordinates_max = (1.0, 1.0)
-refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0),
-                       coordinates_max = (1.0, 1.0)),)
+equations = CompressibleEulerEquations3D(1.4)
+
+initial_condition = initial_condition_weak_blast_wave
+
+surface_flux = flux_ranocha # OBS! Using a non-dissipative flux is only sensible to test EC,
+# but not for real shock simulations
+volume_flux = flux_ranocha
+polydeg = 3
+basis = LobattoLegendreBasis(polydeg)
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max = 0.5,
+                                         alpha_min = 0.001,
+                                         alpha_smooth = true,
+                                         variable = density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg = volume_flux,
+                                                 volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+coordinates_min = (-2.0, -2.0, -2.0)
+coordinates_max = (2.0, 2.0, 2.0)
 mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level = 2,
-                refinement_patches = refinement_patches,
-                n_cells_max = 10_000)
+                initial_refinement_level = 3,
+                n_cells_max = 100_000)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
 
-tspan = (0.0, 1.0)
+tspan = (0.0, 5.0)
 
 # Get CPU data
 (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi
@@ -74,8 +84,7 @@ Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, so
 
 # Test `cuda_boundary_flux!`
 TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
-                              Trixi.have_nonconservative_terms(equations_gpu),
-                              equations_gpu,
+                              Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
                               solver_gpu, cache_gpu)
 Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
                           solver.surface_integral, solver)
@@ -86,8 +95,10 @@ TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(c
                                 solver_gpu, cache_gpu)
 Trixi.prolong2mortars!(cache, u, mesh, equations,
                        solver.mortar, solver.surface_integral, solver)
-@test_approx cache.mortars.u_upper ≈ cache_gpu.mortars.u_upper
-@test_approx cache.mortars.u_lower ≈ cache_gpu.mortars.u_lower
+@test_approx cache_gpu.mortars.u_upper_left ≈ cache.mortars.u_upper_left
+@test_approx cache_gpu.mortars.u_upper_right ≈ cache.mortars.u_upper_right
+@test_approx cache_gpu.mortars.u_lower_left ≈ cache.mortars.u_lower_left
+@test_approx cache_gpu.mortars.u_lower_right ≈ cache.mortars.u_lower_right
 
 # Test `cuda_mortar_flux!`
 TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu),