Skip to content

Commit

Permalink
Add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Sep 24, 2024
1 parent 8f5adc1 commit ef8de42
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 23 deletions.
9 changes: 6 additions & 3 deletions src/TrixiCUDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ module TrixiCUDA
using CUDA: @cuda, CuArray, HostKernel,
threadIdx, blockIdx, blockDim, similar, launch_configuration

using Trixi: AbstractEquations, TreeMesh, DGSEM,
using Trixi: AbstractEquations, True, False,
TreeMesh,
DGSEM,
BoundaryConditionPeriodic, SemidiscretizationHyperbolic,
VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG,
flux, ntuple, nvariables,
True, False,
flux,
ntuple, nvariables,
local_leaf_cells, init_elements, init_interfaces, init_boundaries,
wrap_array, compute_coefficients, have_nonconservative_terms,
boundary_condition_periodic,
set_log_type!, set_sqrt_type!
Expand Down
2 changes: 1 addition & 1 deletion src/auxiliary/auxiliary.jl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include("configurators.jl")
include("helpers.jl")
include("stable.jl")
13 changes: 9 additions & 4 deletions src/auxiliary/helpers.jl → src/auxiliary/stable.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
# Some helper functions and function extensions from Trixi.jl.
# Some helper functions and function extensions that are invoked by GPU
# kernels, mainly to ensure the functions themselves or other functions
# remain stable on the GPU.

# Ref: `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
# See also `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
# `DG` type is not stable on GPU
@inline function get_node_vars(u, equations, indices...)
return SVector(ntuple(@inline(v->u[v, indices...]),
Val(nvariables(equations))))
end

# Ref: `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl
# See also `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl
# `DG` type is not stable on GPU
@inline function get_node_coords(x, equations, indices...)
return SVector(ntuple(@inline(idx->x[idx, indices...]),
Val(ndims(equations))))
end

# Ref: `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
# See also `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
# `DG` type is not stable on GPU
@inline function get_surface_node_vars(u, equations, indices...)
u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]),
Val(nvariables(equations))))
Expand Down
6 changes: 4 additions & 2 deletions src/semidiscretization/semidiscretization_hyperbolic.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# This file is part of the package `Semidiscretizations.jl`.

function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver;
source_terms = nothing,
boundary_conditions = boundary_condition_periodic,
Expand All @@ -7,8 +9,8 @@ function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, sol
initial_cache = NamedTuple())
@assert ndims(mesh) == ndims(equations)

# cache = (; create_cache(mesh, equations, solver, RealT, uEltype)...,
# initial_cache...)
cache = (; create_cache_gpu(mesh, equations, solver, RealT, uEltype)...,
initial_cache...)
# _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver,
# cache)

Expand Down
55 changes: 55 additions & 0 deletions src/solvers/cache.jl
Original file line number Diff line number Diff line change
@@ -1 +1,56 @@
# Rewrite `create_cache` in Trixi.jl to add the specialized parts of the arrays
# required to copy the arrays from CPU to GPU.

# See also `create_cache` in Trixi.jl
function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype)
# Get cells for which an element needs to be created (i.e. all leaf cells)

### ALL COPY TO GPU!!!
leaf_cell_ids = local_leaf_cells(mesh.tree)

elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)

interfaces = init_interfaces(leaf_cell_ids, mesh, elements)

boundaries = init_boundaries(leaf_cell_ids, mesh, elements)

cache = (; elements, interfaces, boundaries)

# Add specialized parts of the cache required to compute the volume integral etc.
cache = (; cache...,
create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype)...)

return cache
end

# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental

Check warning on line 26 in src/solvers/cache.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".

Check warning on line 26 in src/solvers/cache.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".
# in Trixi.jl and it is not implemented here.

function create_cache_gpu(mesh::TreeMesh{1}, equations,
volume_integral::VolumeIntegralWeakForm, dg::DGSEM, uEltype)
NamedTuple()
end

function create_cache_gpu(mesh::TreeMesh{1}, equations,
volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, uEltype)
NamedTuple()
end

# function create_cache_gpu(mesh::TreeMesh{1}, equations,
# volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, uEltype)
# element_ids_dg = Int[]
# element_ids_dgfv = Int[]

# cache = create_cache(mesh, equations,
# VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
# dg, uEltype)

# A2dp1_x = Array{uEltype, 2}
# fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
# for _ in 1:Threads.nthreads()]
# fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
# for _ in 1:Threads.nthreads()]

# return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded,
# fstar1_R_threaded)
# end
9 changes: 6 additions & 3 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,9 @@ end
# partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU)
# and run them on the device (i.e., GPU).

# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental

Check warning on line 607 in src/solvers/dg_1d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".

Check warning on line 607 in src/solvers/dg_1d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".
# in Trixi.jl and it is not implemented here.

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms,
equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
Expand Down Expand Up @@ -697,7 +700,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -766,7 +769,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -1090,7 +1093,7 @@ end

# Put everything together into a single function.

# Ref: `rhs!` function in Trixi.jl
# See also `rhs!` function in Trixi.jl
function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, boundary_conditions,
source_terms::Source, dg::DGSEM, cache) where {Source}
du, u = copy_to_device!(du_cpu, u_cpu)
Expand Down
9 changes: 6 additions & 3 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,9 @@ end
# partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU)
# and run them on the device (i.e., GPU).

# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental

Check warning on line 1006 in src/solvers/dg_2d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".

Check warning on line 1006 in src/solvers/dg_2d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".
# in Trixi.jl and it is not implemented here.

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms, equations,
volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
Expand Down Expand Up @@ -1119,7 +1122,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -1202,7 +1205,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -1729,7 +1732,7 @@ end

# Put everything together into a single function.

# Ref: `rhs!` function in Trixi.jl
# See also `rhs!` function in Trixi.jl
function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, boundary_conditions,
source_terms::Source, dg::DGSEM, cache) where {Source}
du, u = copy_to_device!(du_cpu, u_cpu)
Expand Down
9 changes: 6 additions & 3 deletions src/solvers/dg_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,9 @@ end
# partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU)
# and run them on the device (i.e., GPU).

# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently exprimental

Check warning on line 1314 in src/solvers/dg_3d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".

Check warning on line 1314 in src/solvers/dg_3d.jl

View workflow job for this annotation

GitHub Actions / Spell Check with Typos

"exprimental" should be "experimental".
# in Trixi.jl and it is not implemented here.

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, equations,
volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
Expand Down Expand Up @@ -1445,7 +1448,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -1543,7 +1546,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl
atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))
Expand Down Expand Up @@ -2222,7 +2225,7 @@ end

# Put everything together into a single function.

# Ref: `rhs!` function in Trixi.jl
# See also `rhs!` function in Trixi.jl
function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, boundary_conditions,
source_terms::Source, dg::DGSEM, cache) where {Source}
du, u = copy_to_device!(du_cpu, u_cpu)
Expand Down
4 changes: 2 additions & 2 deletions src/solvers/solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include("dg_1d.jl")
include("dg_2d.jl")
include("dg_3d.jl")

# Ref: `rhs!` function in Trixi.jl
# See also `rhs!` function in Trixi.jl
function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
(; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi

Expand All @@ -16,7 +16,7 @@ function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
return nothing
end

# Ref: `semidiscretize` function in Trixi.jl
# See also `semidiscretize` function in Trixi.jl
function semidiscretizeGPU(semi::SemidiscretizationHyperbolic, tspan)
u0_ode = compute_coefficients(first(tspan), semi)

Expand Down
4 changes: 2 additions & 2 deletions test/test_script.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ coordinates_max = 1.0
mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 4,
n_cells_max = 30_000)

semi = TrixiCUDA.SemidiscretizationHyperbolicGPU(mesh, equations,
initial_condition_convergence_test, solver)
semi = SemidiscretizationHyperbolicGPU(mesh, equations,
initial_condition_convergence_test, solver)

0 comments on commit ef8de42

Please sign in to comment.