Skip to content

Commit

Permalink
Merge pull request #24 from czha/mortar-flux
Browse files Browse the repository at this point in the history
Add mortar flux kernel with `nonconservative_terms::False` to 2D and 3D
  • Loading branch information
huiyuxie authored Aug 30, 2024
2 parents 0fb397d + 8e31fdd commit a71b863
Show file tree
Hide file tree
Showing 13 changed files with 693 additions and 65 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ Our current focus is on the semidiscretization of PDEs. The table below shows th
# GPU Kernels to be Implemented
Kernels left to be implemented on `TreeMesh` with `DGSEM`:
- 1D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`
- 2D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!`
- 3D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!`
- 2D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!`- `nonconservative_terms::True`
- 3D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!` - `nonconservative_terms::True`

# Show Your Support!
We always welcome new people to join us, please feel free to contribute. Also, if you find this package interesting and inspiring, please give it a star. Thanks!
58 changes: 58 additions & 0 deletions examples/advection_mortar_2d.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
using Trixi, TrixiGPU
using OrdinaryDiffEq

# The example is taken from the Trixi.jl

###############################################################################
# semidiscretization of the linear advection equation

advection_velocity = (0.2, -0.7)
equations = LinearScalarAdvectionEquation2D(advection_velocity)

initial_condition = initial_condition_convergence_test
solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)

coordinates_min = (-1.0, -1.0)
coordinates_max = (1.0, 1.0)
refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0),
coordinates_max = (1.0, 1.0)),)
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 2,
refinement_patches = refinement_patches,
n_cells_max = 10_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0, 1.0)
ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
extra_analysis_integrals = (entropy,))

alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 1.6)

callbacks = CallbackSet(summary_callback,
analysis_callback, alive_callback,
save_solution,
stepsize_callback)

###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
save_everystep = false, callback = callbacks);
summary_callback() # print the timer summary
62 changes: 62 additions & 0 deletions examples/advection_mortar_3d.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@

using Trixi, TrixiGPU
using OrdinaryDiffEq

# The example is taken from the Trixi.jl

###############################################################################
# semidiscretization of the linear advection equation

advection_velocity = (0.2, -0.7, 0.5)
equations = LinearScalarAdvectionEquation3D(advection_velocity)

initial_condition = initial_condition_convergence_test
solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)

coordinates_min = (-1.0, -1.0, -1.0)
coordinates_max = (1.0, 1.0, 1.0)
refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0, -1.0),
coordinates_max = (1.0, 1.0, 1.0)),
(type = "box", coordinates_min = (0.0, -0.5, -0.5),
coordinates_max = (0.5, 0.5, 0.5)))
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 2,
refinement_patches = refinement_patches,
n_cells_max = 10_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0, 5.0)
ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
extra_analysis_integrals = (entropy,))

alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 1.2)

callbacks = CallbackSet(summary_callback,
analysis_callback,
alive_callback,
save_solution,
stepsize_callback)

###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
save_everystep = false, callback = callbacks);
summary_callback() # print the timer summary
4 changes: 2 additions & 2 deletions src/auxiliary/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ end
# Helper function for checking `cache.mortars`
@inline function check_cache_mortars(cache)
if iszero(length(cache.mortars.orientations))
return True()
else
return False()
else
return True()
end
end

Expand Down
176 changes: 158 additions & 18 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -442,29 +442,29 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper,
u2 = size(u, 2)

@inbounds begin
for ii in axes(forward_upper, 2)
u_upper[leftright, i, j, k] += forward_upper[j, ii] *
for jj in axes(forward_upper, 2)
u_upper[leftright, i, j, k] += forward_upper[j, jj] *
u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * ii,
isequal(orientation, 1) * ii + isequal(orientation, 2) * u2,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * u2,
large_element] * isequal(large_side, 1)
u_lower[leftright, i, j, k] += forward_lower[j, ii] *
u_lower[leftright, i, j, k] += forward_lower[j, jj] *
u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * ii,
isequal(orientation, 1) * ii + isequal(orientation, 2) * u2,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * u2,
large_element] * isequal(large_side, 1)
end

for ii in axes(forward_lower, 2)
u_upper[leftright, i, j, k] += forward_upper[j, ii] *
for jj in axes(forward_lower, 2)
u_upper[leftright, i, j, k] += forward_upper[j, jj] *
u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * ii,
isequal(orientation, 1) * ii + isequal(orientation, 2) * 1,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * 1,
large_element] * isequal(large_side, 2)
u_lower[leftright, i, j, k] += forward_lower[j, ii] *
u_lower[leftright, i, j, k] += forward_lower[j, jj] *
u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * ii,
isequal(orientation, 1) * ii + isequal(orientation, 2) * 1,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * 1,
large_element] * isequal(large_side, 2)
end
end
Expand All @@ -473,6 +473,82 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper,
return nothing
end

# Kernel for calculating mortar fluxes
function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations,
surface_flux::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u_upper, 3) && k <= length(orientations))
u_ll_upper, u_rr_upper = get_surface_node_vars(u_upper, equations, j, k)
u_ll_lower, u_rr_lower = get_surface_node_vars(u_lower, equations, j, k)
orientation = orientations[k]

flux_upper_node = surface_flux(u_ll_upper, u_rr_upper, orientation, equations)
flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations)

@inbounds begin
for i in axes(fstar_upper, 1)
fstar_upper[i, j, k] = flux_upper_node[i]
fstar_lower[i, j, k] = flux_lower_node[i]
end
end
end

return nothing
end

# Kernel for copying mortar fluxes small to small and small to large
function mortar_flux_copy_to_kernel!(surface_flux_values, tmp_surface_flux_values, fstar_upper,
fstar_lower, reverse_upper, reverse_lower, neighbor_ids,
large_sides, orientations)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_values, 2) &&
k <= length(orientations))
large_element = neighbor_ids[3, k]
upper_element = neighbor_ids[2, k]
lower_element = neighbor_ids[1, k]

large_side = large_sides[k]
orientation = orientations[k]

# Use math expression to enhance performance (against control flow), it is equivalent to,
# `(2 - large_side) * (2 - orientation) * 1 +
# (2 - large_side) * (orientation - 1) * 3 +
# (large_side - 1) * (2 - orientation) * 2 +
# (large_side - 1) * (orientation - 1) * 4`.
direction = large_side + 2 * orientation - 2

surface_flux_values[i, j, direction, upper_element] = fstar_upper[i, j, k]
surface_flux_values[i, j, direction, lower_element] = fstar_lower[i, j, k]

# Use math expression to enhance performance (against control flow), it is equivalent to,
# `(2 - large_side) * (2 - orientation) * 2 +
# (2 - large_side) * (orientation - 1) * 4 +
# (large_side - 1) * (2 - orientation) * 1 +
# (large_side - 1) * (orientation - 1) * 3`.
direction = 2 * orientation - large_side + 1

@inbounds begin
for ii in axes(reverse_upper, 2) # i.e., ` for ii in axes(reverse_lower, 2)`
tmp_surface_flux_values[i, j, direction, large_element] += reverse_upper[j, ii] *
fstar_upper[i, ii, k] +
reverse_lower[j, ii] *
fstar_lower[i, ii, k]
end

surface_flux_values[i, j, direction, large_element] = tmp_surface_flux_values[i, j,
direction,
large_element]
end
end

return nothing
end

# Kernel for calculating surface integrals
function surface_integral_kernel!(du, factor_arr, surface_flux_values,
equations::AbstractEquations{2})
Expand Down Expand Up @@ -825,17 +901,18 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{2}, boundary_conditions::NamedTup
end

# Dummy function for asserting mortars
function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DGSEM, cache)
function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::DGSEM, cache)
@assert iszero(length(cache.mortars.orientations))
end

# Pack kernels for prolonging solution to mortars
function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::DGSEM, cache)
function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DGSEM, cache)
neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids)
large_sides = CuArray{Int64}(cache.mortars.large_sides)
orientations = CuArray{Int64}(cache.mortars.orientations)
u_upper = zero(CuArray{Float64}(cache.mortars.u_upper))
u_lower = zero(CuArray{Float64}(cache.mortars.u_lower))

u_upper = zero(CuArray{Float64}(cache.mortars.u_upper)) # NaN to zero
u_lower = zero(CuArray{Float64}(cache.mortars.u_lower)) # NaN to zero

forward_upper = CuArray{Float64}(dg.mortar.forward_upper)
forward_lower = CuArray{Float64}(dg.mortar.forward_lower)
Expand Down Expand Up @@ -871,6 +948,66 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::D
return nothing
end

# Dummy function for asserting mortar fluxes
function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::False, nonconservative_terms,
equations, dg::DGSEM, cache)
@assert iszero(length(cache.mortars.orientations))
end

# Pack kernels for calculating mortar fluxes
function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::False,
equations, dg::DGSEM, cache)
surface_flux = dg.surface_integral.surface_flux

neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids)
large_sides = CuArray{Int64}(cache.mortars.large_sides)
orientations = CuArray{Int64}(cache.mortars.orientations)

u_upper = CuArray{Float64}(cache.mortars.u_upper)
u_lower = CuArray{Float64}(cache.mortars.u_lower)
reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper)
reverse_lower = CuArray{Float64}(dg.mortar.reverse_lower)

surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
tmp_surface_flux_values = zero(similar(surface_flux_values))

fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))

size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations))

mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper,
u_lower, orientations, equations,
surface_flux)
mortar_flux_kernel(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations,
surface_flux;
configurator_2d(mortar_flux_kernel, size_arr)...)

size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2),
length(orientations))

mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values,
tmp_surface_flux_values,
fstar_upper,
fstar_lower,
reverse_upper,
reverse_lower,
neighbor_ids,
large_sides,
orientations)
mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper,
fstar_lower,
reverse_upper, reverse_lower, neighbor_ids, large_sides,
orientations;
configurator_3d(mortar_flux_copy_to_kernel, size_arr)...)

cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically
end

function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::True,
equations, dg::DGSEM, cache)
end

# Pack kernels for calculating surface integrals
function cuda_surface_integral!(du, mesh::TreeMesh{2}, equations, dg::DGSEM, cache)
factor_arr = CuArray{Float64}([
Expand Down Expand Up @@ -941,6 +1078,9 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, initial_condit

cuda_prolong2mortars!(u, mesh, check_cache_mortars(cache), dg, cache)

cuda_mortar_flux!(mesh, check_cache_mortars(cache), have_nonconservative_terms(equations),
equations, dg, cache)

cuda_surface_integral!(du, mesh, equations, dg, cache)

cuda_jacobian!(du, mesh, equations, cache)
Expand Down
Loading

0 comments on commit a71b863

Please sign in to comment.