Check comments and add more

trixi-gpu · Aug 9, 2024 · 326921f · 326921f
1 parent 696c703
commit 326921f
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 25 deletions.
diff --git a/src/auxiliary/configurators.jl b/src/auxiliary/configurators.jl
@@ -1,4 +1,5 @@
-# Kernel configurators
+# Kernel configurators are used for determining the number of 
+# threads and blocks to be used in the kernel, which optimizes the use of GPU resources.
 
 # Kernel configurator for 1D CUDA array
 function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1})

diff --git a/src/auxiliary/methods.jl b/src/auxiliary/methods.jl
@@ -1,13 +1,16 @@
 # Extend common helper methods from Trixi.jl
 
+# Ref: `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
 @inline function get_node_vars(u, equations, indices...)
     SVector(ntuple(@inline(v->u[v, indices...]), Val(nvariables(equations))))
 end
 
+# Ref: `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl
 @inline function get_node_coords(x, equations, indices...)
     SVector(ntuple(@inline(idx->x[idx, indices...]), Val(ndims(equations))))
 end
 
+# Ref: `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
 @inline function get_surface_node_vars(u, equations, indices...)
     u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations))))
     u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations))))

diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl
@@ -1,5 +1,8 @@
-# Solver functions for 1D DG methods
+# Everything related to a DG semidiscretization in 1D
 
+# Functions end with `_kernel` are CUDA kernels that are going to be launed by the `@cuda` macro.
+
+# Kernel for calculating flux along normal direction
 function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{1})
     j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
@@ -19,6 +22,7 @@ function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{
     return nothing
 end
 
+# kernel for calculating weak form
 function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEquations{1})
     i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
@@ -35,30 +39,10 @@ function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEqu
     return nothing
 end
 
-function volume_flux_kernel!(volume_flux_arr, u, volume_flux::Function,
-                             equations::AbstractEquations{1})
-    j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
-    k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
-
-    if (j <= size(u, 2)^2 && k <= size(u, 3))
-        j1 = div(j - 1, size(u, 2)) + 1
-        j2 = rem(j - 1, size(u, 2)) + 1
-
-        u_node = get_node_vars(u, equations, j1, k)
-        u_node1 = get_node_vars(u, equations, j2, k)
-
-        volume_flux_node = volume_flux(u_node, u_node1, 1, equations)
-
-        @inbounds begin
-            for ii in axes(u, 1)
-                volume_flux_arr[ii, j1, j2, k] = volume_flux_node[ii]
-            end
-        end
-    end
-
-    return nothing
-end
+# Functions begin with `cuda_` are the functions that pack CUDA kernels together, 
+# calling Tthem from the host (i.e., CPU) and running them on the device (i.e., GPU).
 
+# Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGSEM)
     derivative_dhat = CuArray{Float32}(dg.basis.derivative_dhat)