Add comments

trixi-gpu · Sep 24, 2024 · ef8de42 · ef8de42
1 parent 8f5adc1
commit ef8de42
Show file tree

Hide file tree

Showing 10 changed files with 97 additions and 23 deletions.
diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl
@@ -6,11 +6,14 @@ module TrixiCUDA
 using CUDA: @cuda, CuArray, HostKernel,
             threadIdx, blockIdx, blockDim, similar, launch_configuration
 
-using Trixi: AbstractEquations, TreeMesh, DGSEM,
+using Trixi: AbstractEquations, True, False,
+             TreeMesh,
+             DGSEM,
              BoundaryConditionPeriodic, SemidiscretizationHyperbolic,
              VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG,
-             flux, ntuple, nvariables,
-             True, False,
+             flux,
+             ntuple, nvariables,
+             local_leaf_cells, init_elements, init_interfaces, init_boundaries,
              wrap_array, compute_coefficients, have_nonconservative_terms,
              boundary_condition_periodic,
              set_log_type!, set_sqrt_type!

diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
@@ -1,2 +1,2 @@
 include("configurators.jl")
-include("helpers.jl")
+include("stable.jl")
diff --git a/src/auxiliary/helpers.jl → src/auxiliary/stable.jl b/src/auxiliary/helpers.jl → src/auxiliary/stable.jl
@@ -1,18 +1,23 @@
-# Some helper functions and function extensions from Trixi.jl.
+# Some helper functions and function extensions that are invoked by GPU 
+# kernels, mainly to ensure the functions themselves or other functions 
+# remain stable on the GPU.
 
-# Ref: `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
+# See also `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
+# `DG` type is not stable on GPU
 @inline function get_node_vars(u, equations, indices...)
     return SVector(ntuple(@inline(v->u[v, indices...]),
                           Val(nvariables(equations))))
 end
 
-# Ref: `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl
+# See also `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl
+# `DG` type is not stable on GPU
 @inline function get_node_coords(x, equations, indices...)
     return SVector(ntuple(@inline(idx->x[idx, indices...]),
                           Val(ndims(equations))))
 end
 
-# Ref: `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
+# See also `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
+# `DG` type is not stable on GPU
 @inline function get_surface_node_vars(u, equations, indices...)
     u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]),
                           Val(nvariables(equations))))

diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -1,3 +1,5 @@
+# This file is part of the package `Semidiscretizations.jl`.
+
 function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver;
                                          source_terms = nothing,
                                          boundary_conditions = boundary_condition_periodic,
@@ -7,8 +9,8 @@ function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, sol
                                          initial_cache = NamedTuple())
     @assert ndims(mesh) == ndims(equations)
 
-    # cache = (; create_cache(mesh, equations, solver, RealT, uEltype)...,
-    #          initial_cache...)
+    cache = (; create_cache_gpu(mesh, equations, solver, RealT, uEltype)...,
+             initial_cache...)
     # _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver,
     #                                                   cache)
 

diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl
@@ -1 +1,56 @@
+# Rewrite `create_cache` in Trixi.jl to add the specialized parts of the arrays 
+# required to copy the arrays from CPU to GPU.
 
+# See also `create_cache` in Trixi.jl
+function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+
+    ### ALL COPY TO GPU!!!
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+
+    cache = (; elements, interfaces, boundaries)
+
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype)...)
+
+    return cache
+end
+
+# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental
+# in Trixi.jl and it is not implemented here.
+
+function create_cache_gpu(mesh::TreeMesh{1}, equations,
+                          volume_integral::VolumeIntegralWeakForm, dg::DGSEM, uEltype)
+    NamedTuple()
+end
+
+function create_cache_gpu(mesh::TreeMesh{1}, equations,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, uEltype)
+    NamedTuple()
+end
+
+# function create_cache_gpu(mesh::TreeMesh{1}, equations,
+#                       volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, uEltype)
+#     element_ids_dg = Int[]
+#     element_ids_dgfv = Int[]
+
+#     cache = create_cache(mesh, equations,
+#                          VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+#                          dg, uEltype)
+
+#     A2dp1_x = Array{uEltype, 2}
+#     fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+#                                 for _ in 1:Threads.nthreads()]
+#     fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+#                                 for _ in 1:Threads.nthreads()]
+
+#     return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded,
+#             fstar1_R_threaded)
+# end
diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl
@@ -604,6 +604,9 @@ end
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
 
+# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental
+# in Trixi.jl and it is not implemented here.
+
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms,
                                equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
@@ -697,7 +700,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -766,7 +769,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -1090,7 +1093,7 @@ end
 
 # Put everything together into a single function.
 
-# Ref: `rhs!` function in Trixi.jl
+# See also `rhs!` function in Trixi.jl
 function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, boundary_conditions,
                   source_terms::Source, dg::DGSEM, cache) where {Source}
     du, u = copy_to_device!(du_cpu, u_cpu)

diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl
@@ -1003,6 +1003,9 @@ end
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
 
+# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental
+# in Trixi.jl and it is not implemented here.
+
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
@@ -1119,7 +1122,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -1202,7 +1205,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -1729,7 +1732,7 @@ end
 
 # Put everything together into a single function.
 
-# Ref: `rhs!` function in Trixi.jl
+# See also `rhs!` function in Trixi.jl
 function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, boundary_conditions,
                   source_terms::Source, dg::DGSEM, cache) where {Source}
     du, u = copy_to_device!(du_cpu, u_cpu)

diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl
@@ -1311,6 +1311,9 @@ end
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
 
+# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental
+# in Trixi.jl and it is not implemented here.
+
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
@@ -1445,7 +1448,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -1543,7 +1546,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
 
     # For `Float64`, this gives 1.8189894035458565e-12
     # For `Float32`, this gives 1.1920929f-5
-    atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
+    atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl
 
     element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
     element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
@@ -2222,7 +2225,7 @@ end
 
 # Put everything together into a single function.
 
-# Ref: `rhs!` function in Trixi.jl
+# See also `rhs!` function in Trixi.jl
 function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, boundary_conditions,
                   source_terms::Source, dg::DGSEM, cache) where {Source}
     du, u = copy_to_device!(du_cpu, u_cpu)

diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl
@@ -4,7 +4,7 @@ include("dg_1d.jl")
 include("dg_2d.jl")
 include("dg_3d.jl")
 
-# Ref: `rhs!` function in Trixi.jl
+# See also `rhs!` function in Trixi.jl
 function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
     (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi
 
@@ -16,7 +16,7 @@ function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
     return nothing
 end
 
-# Ref: `semidiscretize` function in Trixi.jl
+# See also `semidiscretize` function in Trixi.jl
 function semidiscretizeGPU(semi::SemidiscretizationHyperbolic, tspan)
     u0_ode = compute_coefficients(first(tspan), semi)
 

diff --git a/test/test_script.jl b/test/test_script.jl
@@ -11,5 +11,5 @@ coordinates_max = 1.0
 mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 4,
                 n_cells_max = 30_000)
 
-semi = TrixiCUDA.SemidiscretizationHyperbolicGPU(mesh, equations,
-                                                 initial_condition_convergence_test, solver)
+semi = SemidiscretizationHyperbolicGPU(mesh, equations,
+                                       initial_condition_convergence_test, solver)