trixi-gpu · huiyuxie · Sep 12, 2024 · Sep 9, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/examples/shallowwater_dirichlet_1d.jl b/examples/shallowwater_dirichlet_1d.jl
@@ -6,20 +6,9 @@ using OrdinaryDiffEq
 ###############################################################################
 # Semidiscretization of the shallow water equations
 
-equations = ShallowWaterEquations1D(gravity_constant = 1.0, H0 = 3.0)
+equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.0)
 
-# An initial condition with constant total water height and zero velocities to test well-balancedness.
-function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations1D)
-    # Set the background values
-    H = equations.H0
-    v = 0.0
-
-    b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + 0.75 / exp(0.5 * ((x[1] + 1.0)^2)))
-
-    return prim2cons(SVector(H, v, b), equations)
-end
-
-initial_condition = initial_condition_well_balancedness
+initial_condition = initial_condition_convergence_test
 
 boundary_condition = BoundaryConditionDirichlet(initial_condition)
 
@@ -49,7 +38,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
 ###############################################################################
 # ODE solvers, callbacks etc.
 
-tspan = (0.0, 100.0)
+tspan = (0.0, 1.0)
 ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl
 
 summary_callback = SummaryCallback()

diff --git a/src/solvers/common.jl b/src/solvers/common.jl
@@ -49,7 +49,7 @@ function pure_blended_element_count_kernel!(element_ids_dg, element_ids_dgfv, al
     if (i <= length(alpha))
         dg_only = isapprox(alpha[i], 0, atol = atol)
 
-        if dg_only
+        if dg_only # bad
             element_ids_dg[i] = i
         else
             element_ids_dgfv[i] = i

diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl
@@ -85,8 +85,9 @@ function symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u,
 
         @inbounds begin
             for ii in axes(u, 1)
-                symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii]
-                noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2]
+                symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii] *
+                                                    derivative_split[j1, j2]
+                noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii]
             end
         end
     end
@@ -123,8 +124,8 @@ function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, nonco
             integral_contribution = 0.0 # change back to `Float32`
 
             for ii in axes(du, 2)
-                du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k]
-                integral_contribution += noncons_flux_arr[i, j, ii, k]
+                du[i, j, k] += symmetric_flux_arr[i, j, ii, k]
+                integral_contribution += derivative_split[j, ii] * noncons_flux_arr[i, j, ii, k]
             end
 
             du[i, j, k] += 0.5 * integral_contribution # change back to `Float32`
@@ -149,6 +150,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
 
         element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
 
+        # The sets of `get_node_vars` operations may be combined
+        # into a single set of operation for better performance (to be explored)
+
         u_node = get_node_vars(u, equations, j1, k)
         u_node1 = get_node_vars(u, equations, j2, k)
 
@@ -177,9 +181,63 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
     return nothing
 end
 
+# Kernel for calculating pure DG and DG-FV volume fluxes
+function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
+                                  element_ids_dgfv, derivative_split,
+                                  equations::AbstractEquations{1},
+                                  volume_flux_dg::Any, nonconservative_flux_dg::Any,
+                                  volume_flux_fv::Any, nonconservative_flux_fv::Any)
+    j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
+    k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
+
+    if (j <= size(u, 2)^2 && k <= size(u, 3))
+        # length(element_ids_dgfv) == size(u, 3)
+
+        j1 = div(j - 1, size(u, 2)) + 1
+        j2 = rem(j - 1, size(u, 2)) + 1
+
+        element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
+
+        # The sets of `get_node_vars` operations may be combined
+        # into a single set of operation for better performance (to be explored)
+
+        u_node = get_node_vars(u, equations, j1, k)
+        u_node1 = get_node_vars(u, equations, j2, k)
+
+        volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations)
+        noncons_flux_node = nonconservative_flux_dg(u_node, u_node1, 1, equations)
+
+        @inbounds begin
+            for ii in axes(u, 1)
+                volume_flux_arr[ii, j1, j2, k] = derivative_split[j1, j2] * volume_flux_node[ii]
+                noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii]
+            end
+        end
+
+        if j1 !== 1 && j2 === 1 && element_dgfv !== 0 # bad
+            u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv)
+            u_rr = get_node_vars(u, equations, j1, element_dgfv)
+
+            f1_node = volume_flux_fv(u_ll, u_rr, 1, equations)
+
+            f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations)
+            f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations)
+
+            @inbounds begin
+                for ii in axes(u, 1)
+                    fstar1_L[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii]
+                    fstar1_R[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii]
+                end
+            end
+        end
+    end
+
+    return nothing
+end
+
 # Kernel for calculating DG volume integral contribution
 function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
-                                    volume_flux_arr)
+                                    volume_flux_arr, equations::AbstractEquations{1})
     i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
     k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
@@ -212,6 +270,53 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha,
     return nothing
 end
 
+# Kernel for calculating DG volume integral contribution
+function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
+                                    volume_flux_arr, noncons_flux_arr,
+                                    equations::AbstractEquations{1})
+    i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
+    j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
+    k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
+
+    if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
+        # length(element_ids_dg) == size(du, 3)
+        # length(element_ids_dgfv) == size(du, 3)
+
+        element_dg = element_ids_dg[k] # check if `element_dg` is zero
+        element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
+        alpha_element = alpha[k]
+
+        @inbounds begin
+            if element_dg !== 0 # bad
+                integral_contribution = 0.0
+
+                for ii in axes(du, 2)
+                    du[i, j, element_dg] += volume_flux_arr[i, j, ii, element_dg]
+                    integral_contribution += derivative_split[j, ii] *
+                                             noncons_flux_arr[i, j, ii, element_dg]
+                end
+
+                du[i, j, element_dg] += 0.5 * integral_contribution
+            end
+
+            if element_dgfv !== 0 # bad
+                integral_contribution = 0.0
+
+                for ii in axes(du, 2)
+                    du[i, j, element_dgfv] += (1 - alpha_element) *
+                                              volume_flux_arr[i, j, ii, element_dgfv]
+                    integral_contribution += derivative_split[j, ii] *
+                                             noncons_flux_arr[i, j, ii, element_dgfv]
+                end
+
+                du[i, j, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution
+            end
+        end
+    end
+
+    return nothing
+end
+
 # Kernel for calculating FV volume integral contribution 
 function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv,
                                     alpha)
@@ -473,7 +578,7 @@ end
 
 # Kernel for calculating source terms
 function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEquations{1},
-                              source_terms::Function)
+                              source_terms::Any)
     j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
 
@@ -627,9 +732,86 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
                                                                               element_ids_dgfv,
                                                                               alpha,
                                                                               derivative_split,
-                                                                              volume_flux_arr)
+                                                                              volume_flux_arr,
+                                                                              equations)
+    volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
+                              volume_flux_arr, equations;
+                              configurator_3d(volume_integral_dg_kernel, du)...)
+
+    size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3))
+
+    volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L,
+                                                                              fstar1_R,
+                                                                              inverse_weights,
+                                                                              element_ids_dgfv,
+                                                                              alpha)
+    volume_integral_fv_kernel(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha;
+                              configurator_2d(volume_integral_fv_kernel, size_arr)...)
+
+    return nothing
+end
+
+# Pack kernels for calculating volume integrals
+function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations,
+                               volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
+    volume_flux_dg, nonconservative_flux_dg = dg.volume_integral.volume_flux_dg
+    volume_flux_fv, nonconservative_flux_fv = dg.volume_integral.volume_flux_fv
+    indicator = dg.volume_integral.indicator
+
+    # TODO: Get copies of `u` and `du` on both device and host
+    alpha = indicator(Array(u), mesh, equations, dg, cache)
+    alpha = CuArray{Float64}(alpha)
+
+    # For `Float64`, this gives 1.8189894035458565e-12
+    # For `Float32`, this gives 1.1920929f-5
+    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+
+    element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
+    element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
+
+    pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
+                                                                                              element_ids_dgfv,
+                                                                                              alpha,
+                                                                                              atol)
+    pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol;
+                                      configurator_1d(pure_blended_element_count_kernel, alpha)...)
+
+    derivative_split = dg.basis.derivative_split
+    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
+
+    derivative_split = CuArray{Float64}(derivative_split)
+    volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
+    noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
+
+    inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
+    fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))
+    fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))
+
+    size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3))
+
+    volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr,
+                                                                          noncons_flux_arr,
+                                                                          fstar1_L, fstar1_R, u,
+                                                                          element_ids_dgfv,
+                                                                          derivative_split,
+                                                                          equations, volume_flux_dg,
+                                                                          nonconservative_flux_dg,
+                                                                          volume_flux_fv,
+                                                                          nonconservative_flux_fv)
+    volume_flux_dgfv_kernel(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
+                            element_ids_dgfv, derivative_split, equations, volume_flux_dg,
+                            nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
+                            configurator_2d(volume_flux_dgfv_kernel, size_arr)...)
+
+    volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
+                                                                              element_ids_dgfv,
+                                                                              alpha,
+                                                                              derivative_split,
+                                                                              volume_flux_arr,
+                                                                              noncons_flux_arr,
+                                                                              equations)
     volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
-                              volume_flux_arr;
+                              volume_flux_arr, noncons_flux_arr, equations;
                               configurator_3d(volume_integral_dg_kernel, du)...)
 
     size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3))

diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl
@@ -191,6 +191,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, fstar1_L,
 
         element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
 
+        # The sets of `get_node_vars` operations may be combined
+        # into a single set of operation for better performance (to be explored)
+
         u_node = get_node_vars(u, equations, j1, j2, k)
         u_node1 = get_node_vars(u, equations, j3, j2, k)
         u_node2 = get_node_vars(u, equations, j1, j3, k)
@@ -237,7 +240,8 @@ end
 
 # Kernel for calculating DG volume integral contribution
 function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
-                                    volume_flux_arr1, volume_flux_arr2)
+                                    volume_flux_arr1, volume_flux_arr2,
+                                    equations::AbstractEquations{2})
     i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
     k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
@@ -976,9 +980,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                                                                               alpha,
                                                                               derivative_split,
                                                                               volume_flux_arr1,
-                                                                              volume_flux_arr2)
+                                                                              volume_flux_arr2,
+                                                                              equations)
     volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
-                              volume_flux_arr1, volume_flux_arr2;
+                              volume_flux_arr1, volume_flux_arr2, equations;
                               configurator_3d(volume_integral_dg_kernel, size_arr)...)
 
     size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 4))
@@ -996,6 +1001,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
     return nothing
 end
 
+# Pack kernels for calculating volume integrals
+function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations,
+                               volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
+    return nothing
+end
+
 # Pack kernels for prolonging solution to interfaces
 function cuda_prolong2interfaces!(u, mesh::TreeMesh{2}, equations, cache)
     neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids)