Skip to content

Commit

Permalink
Add kernels for solvers of equations with boundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Aug 22, 2024
1 parent 017e940 commit cf80857
Show file tree
Hide file tree
Showing 9 changed files with 567 additions and 94 deletions.
6 changes: 3 additions & 3 deletions src/TrixiGPU.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module TrixiGPU

# Include other packages that are used in TrixiGPU.jl (# FIXME: Remember to reorder)
# Include other packages that are used in TrixiGPU.jl
# using Reexport: @reexport

using CUDA: @cuda, CuArray, HostKernel,
Expand All @@ -11,8 +11,8 @@ using Trixi: AbstractEquations, TreeMesh, DGSEM,
VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing,
flux, ntuple, nvariables,
True, False,
wrap_array, compute_coefficients,
have_nonconservative_terms,
wrap_array, compute_coefficients, have_nonconservative_terms,
boundary_condition_periodic,
set_log_type, set_sqrt_type

import Trixi: get_node_vars, get_node_coords, get_surface_node_vars
Expand Down
2 changes: 1 addition & 1 deletion src/auxiliary/auxiliary.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
include("configurators.jl")
include("methods.jl")
include("helpers.jl")

# Some default settings for the package
function load_default_settings()
Expand Down
4 changes: 2 additions & 2 deletions src/auxiliary/configurators.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Kernel configurators are used for determining the number of
# threads and blocks to be used in the kernel, which optimizes the use of GPU resources.
# Kernel configurators are used for determining the number of threads and
# blocks to be used in the kernel, which optimizes the use of GPU resources.

# Kernel configurator for 1D CUDA array
function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1})
Expand Down
8 changes: 7 additions & 1 deletion src/auxiliary/methods.jl → src/auxiliary/helpers.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Extend common helper methods from Trixi.jl
# Some helper functions and function extensions from Trixi.jl.

# Ref: `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl
@inline function get_node_vars(u, equations, indices...)
Expand All @@ -21,3 +21,9 @@ end

return u_ll, u_rr
end

# Callable function to replace the `boundary_condition_periodic` from Trixi.jl
@inline function boundary_condition_periodic_callable(u_inner, orientation,
direction, x, t, surface_flux, equations)
return nothing
end
26 changes: 25 additions & 1 deletion src/solvers/common.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# Here are some common functions that are shared between the solvers.
# Some common functions that are shared between the solvers.

# Replace the `boundary_condition_periodic` from Trixi.jl with a callable one
function replace_boundary_conditions(boundary_conditions::NamedTuple)
keys_ = keys(boundary_conditions)
values_ = (func == boundary_condition_periodic ? boundary_condition_periodic_callable : func
for func in values(boundary_conditions))
return NamedTuple{keys_}(values_)
end

# Copy data from host to device
function copy_to_device!(du::PtrArray, u::PtrArray)
Expand All @@ -16,3 +24,19 @@ function copy_to_host!(du::CuArray, u::CuArray)

return (du, u)
end

# Kernel for getting last and first indices
function last_first_indices_kernel!(lasts, firsts, n_boundaries_per_direction)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

if (i <= length(n_boundaries_per_direction))
@inbounds begin
for ii in 1:i
lasts[i] += n_boundaries_per_direction[ii]
end
firsts[i] = lasts[i] - n_boundaries_per_direction[i] + 1
end
end

return nothing
end
163 changes: 143 additions & 20 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Everything related to a DG semidiscretization in 1D
# Everything related to a DG semidiscretization in 1D.

# Functions that end with `_kernel` are CUDA kernels that are going to be launched by
# the @cuda macro with parameters from the kernel configurator. They are purely run on
Expand Down Expand Up @@ -84,7 +84,7 @@ function volume_integral_kernel!(du, derivative_split, volume_flux_arr)
end

# Kernel for prolonging two interfaces
function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, equations::AbstractEquations{1})
function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

Expand Down Expand Up @@ -122,8 +122,7 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, equations::Abstrac
end

# Kernel for setting interface fluxes
function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids,
equations::AbstractEquations{1})
function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

Expand All @@ -140,6 +139,65 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_
return nothing
end

# Kernel for prolonging two boundaries
function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_sides)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(boundaries_u, 2) && k <= size(boundaries_u, 3))
element = neighbor_ids[k]
side = neighbor_sides[k]

@inbounds begin
boundaries_u[1, j, k] = u[j, size(u, 2), element] * isequal(side, 1) # set to 0 instead of NaN
boundaries_u[2, j, k] = u[j, 1, element] * (1 - isequal(side, 1)) # set to 0 instead of NaN
end
end

return nothing
end

# Kernel for calculating boundary fluxes
function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr,
indices_arr, neighbor_ids, neighbor_sides, orientations,
boundary_conditions::NamedTuple, equations::AbstractEquations{1},
surface_flux::Any)
k = (blockIdx().x - 1) * blockDim().x + threadIdx().x

if (k <= length(boundary_arr))
boundary = boundary_arr[k]
direction = (indices_arr[1] <= boundary) + (indices_arr[2] <= boundary)

neighbor = neighbor_ids[boundary]
side = neighbor_sides[boundary]
orientation = orientations[boundary]

u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, boundary)
u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr
x = get_node_coords(node_coordinates, equations, boundary)

# TODO: Optimize flow controls as they are not recommended on GPUs
if direction == 1
boundary_flux_node = boundary_conditions[1](u_inner, orientation,
direction, x, t, surface_flux, equations)
else
boundary_flux_node = boundary_conditions[2](u_inner, orientation,
direction, x, t, surface_flux, equations)
end

@inbounds begin
for ii in axes(surface_flux_values, 1)
surface_flux_values[ii, direction, neighbor] = boundary_flux_node === nothing ? # bad
surface_flux_values[ii, direction,
neighbor] :
boundary_flux_node[ii]
end
end
end

return nothing
end

# Kernel for calculating surface integrals
function surface_integral_kernel!(du, factor_arr, surface_flux_values,
equations::AbstractEquations{1})
Expand Down Expand Up @@ -226,12 +284,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
volume_flux_kernel = @cuda launch=false volume_flux_kernel!(volume_flux_arr, u, equations,
volume_flux)
volume_flux_kernel(volume_flux_arr, u, equations, volume_flux;
configurator_2d(volume_flux_kernel, size_arr)...,)
configurator_2d(volume_flux_kernel, size_arr)...)

volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split,
volume_flux_arr)
volume_integral_kernel(du, derivative_split, volume_flux_arr;
configurator_3d(volume_integral_kernel, du)...,)
configurator_3d(volume_integral_kernel, du)...)

return nothing
end
Expand All @@ -244,10 +302,9 @@ function cuda_prolong2interfaces!(u, mesh::TreeMesh{1}, equations, cache)
size_arr = CuArray{Float64}(undef, size(interfaces_u, 2), size(interfaces_u, 3))

prolong_interfaces_kernel = @cuda launch=false prolong_interfaces_kernel!(interfaces_u, u,
neighbor_ids,
equations)
prolong_interfaces_kernel(interfaces_u, u, neighbor_ids, equations;
configurator_2d(prolong_interfaces_kernel, size_arr)...,)
neighbor_ids)
prolong_interfaces_kernel(interfaces_u, u, neighbor_ids;
configurator_2d(prolong_interfaces_kernel, size_arr)...)

cache.interfaces.u = interfaces_u # copy back to host automatically

Expand All @@ -269,15 +326,15 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::False, e
surface_flux_kernel = @cuda launch=false surface_flux_kernel!(surface_flux_arr, interfaces_u,
equations, surface_flux)
surface_flux_kernel(surface_flux_arr, interfaces_u, equations, surface_flux;
configurator_1d(surface_flux_kernel, size_arr)...,)
configurator_1d(surface_flux_kernel, size_arr)...)

size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3))

interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values,
surface_flux_arr,
neighbor_ids, equations)
interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids, equations;
configurator_2d(interface_flux_kernel, size_arr)...,)
neighbor_ids)
interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids;
configurator_2d(interface_flux_kernel, size_arr)...)

cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically

Expand All @@ -286,16 +343,82 @@ end

# Dummy function for asserting boundaries
function cuda_prolong2boundaries!(u, mesh::TreeMesh{1},
boundary_condition::BoundaryConditionPeriodic, cache)
boundary_condition::BoundaryConditionPeriodic, equations, cache)
@assert iszero(length(cache.boundaries.orientations))
end

# Pack kernels for prolonging solution to boundaries
function cuda_prolong2boundaries!(u, mesh::TreeMesh{1}, boundary_conditions::NamedTuple, equations,
cache)
neighbor_ids = CuArray{Int64}(cache.boundaries.neighbor_ids)
neighbor_sides = CuArray{Int64}(cache.boundaries.neighbor_sides)
boundaries_u = CuArray{Float64}(cache.boundaries.u)

size_arr = CuArray{Float64}(undef, size(boundaries_u, 2), size(boundaries_u, 3))

prolong_boundaries_kernel = @cuda launch=false prolong_boundaries_kernel!(boundaries_u, u,
neighbor_ids,
neighbor_sides)
prolong_boundaries_kernel(boundaries_u, u, neighbor_ids, neighbor_sides;
configurator_2d(prolong_boundaries_kernel, size_arr)...)

cache.boundaries.u = boundaries_u # copy back to host automatically

return nothing
end

# Dummy function for asserting boundary fluxes
function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_condition::BoundaryConditionPeriodic,
equations, dg::DGSEM, cache)
@assert iszero(length(cache.boundaries.orientations))
end

# Pack kernels for calculating boundary fluxes
function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTuple, equations,
dg::DGSEM, cache)
surface_flux = dg.surface_integral.surface_flux

n_boundaries_per_direction = CuArray{Int64}(cache.boundaries.n_boundaries_per_direction)
neighbor_ids = CuArray{Int64}(cache.boundaries.neighbor_ids)
neighbor_sides = CuArray{Int64}(cache.boundaries.neighbor_sides)
orientations = CuArray{Int64}(cache.boundaries.orientations)
boundaries_u = CuArray{Float64}(cache.boundaries.u)
node_coordinates = CuArray{Float64}(cache.boundaries.node_coordinates)
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)

lasts = zero(n_boundaries_per_direction)
firsts = zero(n_boundaries_per_direction)

last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts,
n_boundaries_per_direction)
last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction;
configurator_1d(last_first_indices_kernel, lasts)...)

lasts, firsts = Array(lasts), Array(firsts)
boundary_arr = CuArray{Int64}(firsts[1]:lasts[2])
indices_arr = CuArray{Int64}([firsts[1], firsts[2]])
boundary_conditions_callable = replace_boundary_conditions(boundary_conditions)

size_arr = CuArray{Float64}(undef, length(boundary_arr))

boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values,
boundaries_u, node_coordinates,
t, boundary_arr, indices_arr,
neighbor_ids, neighbor_sides,
orientations,
boundary_conditions_callable,
equations,
surface_flux)
boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr,
indices_arr, neighbor_ids, neighbor_sides, orientations,
boundary_conditions_callable, equations, surface_flux;
configurator_1d(boundary_flux_kernel, size_arr)...)

cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically

return nothing
end

# Pack kernels for calculating surface integrals
function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cache)
# FIXME: Check `surface_integral`
Expand All @@ -311,7 +434,7 @@ function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cac
surface_flux_values,
equations)
surface_integral_kernel(du, factor_arr, surface_flux_values, equations;
configurator_3d(surface_integral_kernel, size_arr)...,)
configurator_3d(surface_integral_kernel, size_arr)...)

return nothing
end
Expand All @@ -333,14 +456,14 @@ end

# Pack kernels for calculating source terms
function cuda_sources!(du, u, t, source_terms, equations::AbstractEquations{1}, cache)
node_coordinates = CuArray{Float32}(cache.elements.node_coordinates)
node_coordinates = CuArray{Float64}(cache.elements.node_coordinates)

size_arr = CuArray{Float32}(undef, size(du, 2), size(du, 3))
size_arr = CuArray{Float64}(undef, size(du, 2), size(du, 3))

source_terms_kernel = @cuda launch=false source_terms_kernel!(du, u, node_coordinates, t,
equations, source_terms)
source_terms_kernel(du, u, node_coordinates, t, equations, source_terms;
configurator_2d(source_terms_kernel, size_arr)...,)
configurator_2d(source_terms_kernel, size_arr)...)

return nothing
end
Expand All @@ -359,7 +482,7 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, initial_condit

cuda_interface_flux!(mesh, have_nonconservative_terms(equations), equations, dg, cache)

cuda_prolong2boundaries!(u, mesh, boundary_conditions, cache)
cuda_prolong2boundaries!(u, mesh, boundary_conditions, equations, cache)

cuda_boundary_flux!(t, mesh, boundary_conditions, equations, dg, cache)

Expand Down
Loading

0 comments on commit cf80857

Please sign in to comment.