From 648ce8d0a7494a12b178525774ce352803b59f03 Mon Sep 17 00:00:00 2001
From: huiyuxie <huiyuxie.sde@gmail.com>
Date: Mon, 23 Sep 2024 21:39:01 -1000
Subject: [PATCH] Complete 2D

---
 src/TrixiCUDA.jl     |  5 +--
 src/solvers/cache.jl | 83 +++++++++++++++++++++++++++++++++++++-------
 src/solvers/dg_2d.jl | 24 ++++++-------
 test/test_script.jl  | 49 +++++++++++++++-----------
 4 files changed, 114 insertions(+), 47 deletions(-)

diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl
index ad75d43..57c87ef 100644
--- a/src/TrixiCUDA.jl
+++ b/src/TrixiCUDA.jl
@@ -11,8 +11,9 @@ using Trixi: AbstractEquations, True, False,
              TreeMesh, DGSEM,
              BoundaryConditionPeriodic, SemidiscretizationHyperbolic,
              VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG,
-             flux, ntuple, nvariables, nnodes, nelements,
-             local_leaf_cells, init_elements, init_interfaces, init_boundaries,
+             LobattoLegendreMortarL2,
+             flux, ntuple, nvariables, nnodes, nelements, nmortars,
+             local_leaf_cells, init_elements, init_interfaces, init_boundaries, init_mortars,
              wrap_array, compute_coefficients, have_nonconservative_terms,
              boundary_condition_periodic,
              digest_boundary_conditions, check_periodicity_mesh_boundary_conditions,
diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl
index e09c6d5..7e4b244 100644
--- a/src/solvers/cache.jl
+++ b/src/solvers/cache.jl
@@ -1,11 +1,19 @@
 # Rewrite `create_cache` in Trixi.jl to add the specialized parts of the arrays 
 # required to copy the arrays from CPU to GPU.
 
-# See also `create_cache` in Trixi.jl
+# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently 
+# experimental in Trixi.jl and it is not implemented here.
+
+# Create cache for general tree mesh
+function create_cache_gpu(mesh, equations,
+                          volume_integral::VolumeIntegralWeakForm, dg::DGSEM,
+                          uEltype, cache)
+    NamedTuple()
+end
+
+# Create cache specialized for 1D tree mesh
 function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype)
     # Get cells for which an element needs to be created (i.e. all leaf cells)
-
-    ### ALL COPY TO GPU!!!
     leaf_cell_ids = local_leaf_cells(mesh.tree)
 
     elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
@@ -23,16 +31,6 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltyp
     return cache
 end
 
-# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental
-# in Trixi.jl and it is not implemented here.
-
-# Create cache for 1D tree mesh
-function create_cache_gpu(mesh::TreeMesh{1}, equations,
-                          volume_integral::VolumeIntegralWeakForm, dg::DGSEM,
-                          uEltype, cache)
-    NamedTuple()
-end
-
 function create_cache_gpu(mesh::TreeMesh{1}, equations,
                           volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
                           uEltype, cache)
@@ -52,3 +50,62 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations,
     # Remove `element_ids_dg` and `element_ids_dgfv` here
     return (; cache..., fstar1_L, fstar1_R)
 end
+
+# Create cache specialized for 2D tree mesh
+function create_cache_gpu(mesh::TreeMesh{2}, equations,
+                          dg::DGSEM, RealT, uEltype)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+
+    mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+
+    cache = (; elements, interfaces, boundaries, mortars)
+
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...)
+    cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...)
+
+    return cache
+end
+
+function create_cache_gpu(mesh::TreeMesh{2}, equations,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
+                          uEltype, cache)
+    NamedTuple()
+end
+
+function create_cache_gpu(mesh::TreeMesh{2}, equations,
+                          volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
+                          uEltype, cache)
+    fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
+                          nelements(cache.elements))
+    fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
+                          nelements(cache.elements))
+    fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
+                          nelements(cache.elements))
+    fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
+                          nelements(cache.elements))
+
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                         dg, uEltype, cache)
+
+    return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R)
+end
+
+function create_cache_gpu(mesh::TreeMesh{2}, equations,
+                          mortar_l2::LobattoLegendreMortarL2, uEltype, cache)
+    fstar_upper = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                             nmortars(cache.mortars))
+    fstar_lower = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
+                             nmortars(cache.mortars))
+
+    (; fstar_upper, fstar_lower)
+end
diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl
index 9c35f76..bb3eb90 100644
--- a/src/solvers/dg_2d.jl
+++ b/src/solvers/dg_2d.jl
@@ -1144,10 +1144,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                                         size(u, 4))
 
     inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
-    fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
-    fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
-    fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
-    fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
+    fstar1_L = cache.fstar1_L
+    fstar1_R = cache.fstar1_R
+    fstar2_L = cache.fstar2_L
+    fstar2_R = cache.fstar2_R
 
     size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4))
 
@@ -1231,10 +1231,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                                          size(u, 4))
 
     inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
-    fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
-    fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4)))
-    fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
-    fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4)))
+    fstar1_L = cache.fstar1_L
+    fstar1_R = cache.fstar1_R
+    fstar2_L = cache.fstar2_L
+    fstar2_R = cache.fstar2_R
 
     size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4))
 
@@ -1594,8 +1594,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
     surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
     tmp_surface_flux_values = zero(similar(surface_flux_values))
 
-    fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
-    fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
+    fstar_upper = cache.fstar_upper
+    fstar_lower = cache.fstar_lower
 
     size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations))
 
@@ -1645,8 +1645,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
     surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
     tmp_surface_flux_values = zero(similar(surface_flux_values))
 
-    fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
-    fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
+    fstar_upper = cache.fstar_upper
+    fstar_lower = cache.fstar_lower
 
     size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations))
 
diff --git a/test/test_script.jl b/test/test_script.jl
index 478d612..b58eb55 100644
--- a/test/test_script.jl
+++ b/test/test_script.jl
@@ -1,26 +1,18 @@
 include("test_trixicuda.jl")
 
-equations = CompressibleEulerEquations1D(1.4)
-
-initial_condition = initial_condition_weak_blast_wave
-
-surface_flux = flux_lax_friedrichs
-volume_flux = flux_shima_etal
-basis = LobattoLegendreBasis(3)
-indicator_sc = IndicatorHennemannGassner(equations, basis,
-                                         alpha_max = 0.5,
-                                         alpha_min = 0.001,
-                                         alpha_smooth = true,
-                                         variable = density_pressure)
-volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
-                                                 volume_flux_dg = volume_flux,
-                                                 volume_flux_fv = surface_flux)
-solver = DGSEM(basis, surface_flux, volume_integral)
-
-coordinates_min = -2.0
-coordinates_max = 2.0
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+initial_condition = initial_condition_convergence_test
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -1.0)
+coordinates_max = (1.0, 1.0)
+refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0),
+                       coordinates_max = (1.0, 1.0)),)
 mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level = 5,
+                initial_refinement_level = 2,
+                refinement_patches = refinement_patches,
                 n_cells_max = 10_000)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@@ -89,6 +81,23 @@ Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
                           solver.surface_integral, solver)
 @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values
 
+# Test `cuda_prolong2mortars!`
+TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu),
+                                solver_gpu, cache_gpu)
+Trixi.prolong2mortars!(cache, u, mesh, equations,
+                       solver.mortar, solver.surface_integral, solver)
+@test_approx cache.mortars.u_upper ≈ cache_gpu.mortars.u_upper
+@test_approx cache.mortars.u_lower ≈ cache_gpu.mortars.u_lower
+
+# Test `cuda_mortar_flux!`
+TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu),
+                            Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
+                            solver_gpu, cache_gpu)
+Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                        Trixi.have_nonconservative_terms(equations), equations,
+                        solver.mortar, solver.surface_integral, solver, cache)
+@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values
+
 # Test `cuda_surface_integral!`
 TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)