Skip to content

Commit

Permalink
MPI implementation for non-conservative equations using `ParallelP4es…
Browse files Browse the repository at this point in the history
…tMesh{3}` (trixi-framework#2126)

* Added first working version of MPI with non-conservative systems for CONFORMING meshes (analysis routines need work)

* Added a first version of calc_mpi_mortar_flux! for non-conservative equations (not working yet)

* Added non-unique mortar fluxes to be able to handle non-conservative equations

* format

* Updated p4est 3d tests

* Updated parabolic and parallel p4est 3d implementations for new naming convention

* format

* Adapted MPI mortars to new mortar fluxes fstar_primary and fstar_secondary

* Fixed problem with parabolic non-conforming discretization

* One solution for the MHD analysis problem with MPI

* Added parallel MHD test

* Selected the right type for inizialization of integral in integrate_via_indices

* Added minimal test to reproduce issues with nranks > ntrees

* Fixed GLM speed callback for parallel meshes

* Fixed the computation of :linf_divb for parallel meshes and improved reduction operations for GLM speed

* Added non-conforming non-conservative parallel test with 1 tree to test nranks > ntrees

* Removed unused MHD conforming test

* Apply suggestions from code review

Change the occurrences of `Float64` literals to `Float32`-compatible literals

* Format

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <[email protected]>

---------

Co-authored-by: Hendrik Ranocha <[email protected]>
  • Loading branch information
amrueda and ranocha authored Nov 20, 2024
1 parent db83c71 commit 21a5a8d
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 19 deletions.
10 changes: 10 additions & 0 deletions src/callbacks_step/analysis_dg3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ function analyze(::Val{:linf_divb}, du, u, t,
end
end

if mpi_isparallel()
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[]
end

return linf_divb
end

Expand Down Expand Up @@ -412,6 +417,11 @@ function analyze(::Val{:linf_divb}, du, u, t,
end
end

if mpi_isparallel()
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[]
end

return linf_divb
end
end # @muladd
10 changes: 6 additions & 4 deletions src/callbacks_step/analysis_dg3d_parallel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ function integrate_via_indices(func::Func, u,
@unpack weights = dg.basis

# Initialize integral with zeros of the right shape
# Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
# current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1,
equations, dg, args...))
# Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg), 1)`
# to `func` since `u` might be empty, if the current rank has no elements.
# See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and
# https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243.
integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg),
nnodes(dg), 1), 1, 1, 1, 1, equations, dg, args...))
volume = zero(real(mesh))

# Use quadrature to numerically integrate over entire domain
Expand Down
14 changes: 14 additions & 0 deletions src/callbacks_step/glm_speed_dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh,
# Cartesian meshes
max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) *
ndims(equations)

if mpi_isparallel()
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h),
Base.max,
mpi_comm())[]
end

# OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted
# as well if that callback changes.
return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h)
Expand All @@ -29,6 +37,12 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh,
# Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations.
max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations)

if mpi_isparallel()
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h),
Base.max,
mpi_comm())[]
end
# This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
# `polydeg+1`. This is because `nnodes(dg)` returns the total number of
# multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
Expand Down
99 changes: 84 additions & 15 deletions src/solvers/dgsem_p4est/dg_3d_parallel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,40 @@ end
end
end

# Inlined version of the interface flux computation for non-conservative equations
@inline function calc_mpi_interface_flux!(surface_flux_values,
mesh::Union{ParallelP4estMesh{3},
ParallelT8codeMesh{3}},
nonconservative_terms::True, equations,
surface_integral, dg::DG, cache,
interface_index, normal_direction,
interface_i_node_index,
interface_j_node_index, local_side,
surface_i_node_index, surface_j_node_index,
local_direction_index, local_element_index)
@unpack u = cache.mpi_interfaces
surface_flux, nonconservative_flux = surface_integral.surface_flux

u_ll, u_rr = get_surface_node_vars(u, equations, dg,
interface_i_node_index, interface_j_node_index,
interface_index)

# Compute flux and non-conservative term for this side of the interface
if local_side == 1
flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
noncons_flux_ = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
else # local_side == 2
flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
noncons_flux_ = -nonconservative_flux(u_rr, u_ll, -normal_direction, equations)
end

for v in eachvariable(equations)
surface_flux_values[v, surface_i_node_index, surface_j_node_index,
local_direction_index, local_element_index] = flux_[v] +
0.5f0 * noncons_flux_[v]
end
end

function prolong2mpimortars!(cache, u,
mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
equations,
Expand Down Expand Up @@ -384,12 +418,13 @@ function calc_mpi_mortar_flux!(surface_flux_values,
surface_integral, dg::DG, cache)
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
@unpack contravariant_vectors = cache.elements
@unpack fstar_primary_threaded, fstar_tmp_threaded = cache
@unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache
index_range = eachnode(dg)

@threaded for mortar in eachmpimortar(dg, cache)
# Choose thread-specific pre-allocated container
fstar = fstar_primary_threaded[Threads.threadid()]
fstar_primary = fstar_primary_threaded[Threads.threadid()]
fstar_secondary = fstar_secondary_threaded[Threads.threadid()]
fstar_tmp = fstar_tmp_threaded[Threads.threadid()]

# Get index information on the small elements
Expand All @@ -412,7 +447,8 @@ function calc_mpi_mortar_flux!(surface_flux_values,
normal_direction = get_normal_direction(cache.mpi_mortars, i, j,
position, mortar)

calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh,
nonconservative_terms, equations,
surface_integral, dg, cache,
mortar, position, normal_direction,
i, j)
Expand All @@ -433,14 +469,15 @@ function calc_mpi_mortar_flux!(surface_flux_values,

mpi_mortar_fluxes_to_elements!(surface_flux_values,
mesh, equations, mortar_l2, dg, cache,
mortar, fstar, u_buffer, fstar_tmp)
mortar, fstar_primary, fstar_secondary, u_buffer,
fstar_tmp)
end

return nothing
end

# Inlined version of the mortar flux computation on small elements for conservation laws
@inline function calc_mpi_mortar_flux!(fstar,
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
mesh::Union{ParallelP4estMesh{3},
ParallelT8codeMesh{3}},
nonconservative_terms::False, equations,
Expand All @@ -456,16 +493,48 @@ end
flux = surface_flux(u_ll, u_rr, normal_direction, equations)

# Copy flux to buffer
set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index,
set_node_vars!(fstar_primary, flux, equations, dg, i_node_index, j_node_index,
position_index)
set_node_vars!(fstar_secondary, flux, equations, dg, i_node_index, j_node_index,
position_index)
end

# Inlined version of the mortar flux computation on small elements for non-conservative equations
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
mesh::Union{ParallelP4estMesh{3},
ParallelT8codeMesh{3}},
nonconservative_terms::True, equations,
surface_integral, dg::DG, cache,
mortar_index, position_index, normal_direction,
i_node_index, j_node_index)
@unpack u = cache.mpi_mortars
surface_flux, nonconservative_flux = surface_integral.surface_flux

u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
j_node_index, mortar_index)

flux = surface_flux(u_ll, u_rr, normal_direction, equations)
noncons_flux_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
noncons_flux_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,
equations)

for v in eachvariable(equations)
fstar_primary[v, i_node_index, j_node_index, position_index] = flux[v] +
0.5f0 *
noncons_flux_primary[v]
fstar_secondary[v, i_node_index, j_node_index, position_index] = flux[v] +
0.5f0 *
noncons_flux_secondary[v]
end
end

@inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
mesh::Union{ParallelP4estMesh{3},
ParallelT8codeMesh{3}},
equations,
mortar_l2::LobattoLegendreMortarL2,
dg::DGSEM, cache, mortar, fstar,
dg::DGSEM, cache, mortar, fstar_primary,
fstar_secondary,
u_buffer, fstar_tmp)
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
index_range = eachnode(dg)
Expand All @@ -487,22 +556,22 @@ end
# Project small fluxes to large element.
multiply_dimensionwise!(u_buffer,
mortar_l2.reverse_lower, mortar_l2.reverse_lower,
view(fstar, .., 1),
view(fstar_secondary, .., 1),
fstar_tmp)
add_multiply_dimensionwise!(u_buffer,
mortar_l2.reverse_upper,
mortar_l2.reverse_lower,
view(fstar, .., 2),
view(fstar_secondary, .., 2),
fstar_tmp)
add_multiply_dimensionwise!(u_buffer,
mortar_l2.reverse_lower,
mortar_l2.reverse_upper,
view(fstar, .., 3),
view(fstar_secondary, .., 3),
fstar_tmp)
add_multiply_dimensionwise!(u_buffer,
mortar_l2.reverse_upper,
mortar_l2.reverse_upper,
view(fstar, .., 4),
view(fstar_secondary, .., 4),
fstar_tmp)
# The flux is calculated in the outward direction of the small elements,
# so the sign must be switched to get the flux in outward direction
Expand Down Expand Up @@ -536,10 +605,10 @@ end
for j in eachnode(dg)
for i in eachnode(dg)
for v in eachvariable(equations)
surface_flux_values[v, i, j, small_direction, element] = fstar[v,
i,
j,
position]
surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v,
i,
j,
position]
end
end
end
Expand Down
74 changes: 74 additions & 0 deletions test/test_mpi_p4est_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,80 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "p4est_3d_dgsem")
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
end
end

@trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR,
"elixir_mhd_alfven_wave_nonconforming.jl"),
l2=[
0.0001788543743594658,
0.000624334205581902,
0.00022892869974368887,
0.0007223464581156573,
0.0006651366626523314,
0.0006287275014743352,
0.000344484339916008,
0.0007179788287557142,
8.632896980651243e-7
],
linf=[
0.0010730565632763867,
0.004596749809344033,
0.0013235269262853733,
0.00468874234888117,
0.004719267084104306,
0.004228339352211896,
0.0037503625505571625,
0.005104176909383168,
9.738081186490818e-6
],
tspan=(0.0, 0.25),
coverage_override=(trees_per_dimension = (1, 1, 1),))
# Ensure that we do not have excessive memory allocations
# (e.g., from type instabilities)
let
t = sol.t[end]
u_ode = sol.u[end]
du_ode = similar(u_ode)
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
end
end

# Same test as above but with only one tree in the mesh
# We use it to test meshes with elements of different size in each partition
@test_trixi_include(joinpath(EXAMPLES_DIR,
"elixir_mhd_alfven_wave_nonconforming.jl"),
l2=[
0.0019054118500017054,
0.006957977226608083,
0.003429930594167365,
0.009051598556176287,
0.0077261662742688425,
0.008210851821439208,
0.003763030674412298,
0.009175470744760567,
2.881690753923244e-5
],
linf=[
0.010983704624623503,
0.04584128974425262,
0.02022630484954286,
0.04851342295826149,
0.040710154751363525,
0.044722299260292586,
0.036591209423654236,
0.05701669133068068,
0.00024182906501186622
],
tspan=(0.0, 0.25), trees_per_dimension=(1, 1, 1),
coverage_override=(trees_per_dimension = (1, 1, 1),))
# Ensure that we do not have excessive memory allocations
# (e.g., from type instabilities)
let
t = sol.t[end]
u_ode = sol.u[end]
du_ode = similar(u_ode)
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
end
end
end # P4estMesh MPI

Expand Down

0 comments on commit 21a5a8d

Please sign in to comment.