diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index fd501a7257c..85880380c61 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -376,6 +376,11 @@ function analyze(::Val{:linf_divb}, du, u, t, end end + if mpi_isparallel() + # Base.max instead of max needed, see comment in src/auxiliary/math.jl + linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[] + end + return linf_divb end @@ -412,6 +417,11 @@ function analyze(::Val{:linf_divb}, du, u, t, end end + if mpi_isparallel() + # Base.max instead of max needed, see comment in src/auxiliary/math.jl + linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[] + end + return linf_divb end end # @muladd diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl index 70a616367cd..285f0f62de6 100644 --- a/src/callbacks_step/analysis_dg3d_parallel.jl +++ b/src/callbacks_step/analysis_dg3d_parallel.jl @@ -72,10 +72,12 @@ function integrate_via_indices(func::Func, u, @unpack weights = dg.basis # Initialize integral with zeros of the right shape - # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the - # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096. - integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1, - equations, dg, args...)) + # Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg), 1)` + # to `func` since `u` might be empty, if the current rank has no elements. + # See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and + # https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243. + integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), + nnodes(dg), 1), 1, 1, 1, 1, equations, dg, args...)) volume = zero(real(mesh)) # Use quadrature to numerically integrate over entire domain diff --git a/src/callbacks_step/glm_speed_dg.jl b/src/callbacks_step/glm_speed_dg.jl index 302aae356ab..4d9e4eee23f 100644 --- a/src/callbacks_step/glm_speed_dg.jl +++ b/src/callbacks_step/glm_speed_dg.jl @@ -13,6 +13,14 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh, # Cartesian meshes max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) * ndims(equations) + + if mpi_isparallel() + # Base.max instead of max needed, see comment in src/auxiliary/math.jl + max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h), + Base.max, + mpi_comm())[] + end + # OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted # as well if that callback changes. return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h) @@ -29,6 +37,12 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh, # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations. max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations) + if mpi_isparallel() + # Base.max instead of max needed, see comment in src/auxiliary/math.jl + max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h), + Base.max, + mpi_comm())[] + end # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by # `polydeg+1`. This is because `nnodes(dg)` returns the total number of # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 3daca10e821..e51836e8a51 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -267,6 +267,40 @@ end end end +# Inlined version of the interface flux computation for non-conservative equations +@inline function calc_mpi_interface_flux!(surface_flux_values, + mesh::Union{ParallelP4estMesh{3}, + ParallelT8codeMesh{3}}, + nonconservative_terms::True, equations, + surface_integral, dg::DG, cache, + interface_index, normal_direction, + interface_i_node_index, + interface_j_node_index, local_side, + surface_i_node_index, surface_j_node_index, + local_direction_index, local_element_index) + @unpack u = cache.mpi_interfaces + surface_flux, nonconservative_flux = surface_integral.surface_flux + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, + interface_i_node_index, interface_j_node_index, + interface_index) + + # Compute flux and non-conservative term for this side of the interface + if local_side == 1 + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + noncons_flux_ = nonconservative_flux(u_ll, u_rr, normal_direction, equations) + else # local_side == 2 + flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) + noncons_flux_ = -nonconservative_flux(u_rr, u_ll, -normal_direction, equations) + end + + for v in eachvariable(equations) + surface_flux_values[v, surface_i_node_index, surface_j_node_index, + local_direction_index, local_element_index] = flux_[v] + + 0.5f0 * noncons_flux_[v] + end +end + function prolong2mpimortars!(cache, u, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, equations, @@ -384,12 +418,13 @@ function calc_mpi_mortar_flux!(surface_flux_values, surface_integral, dg::DG, cache) @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars @unpack contravariant_vectors = cache.elements - @unpack fstar_primary_threaded, fstar_tmp_threaded = cache + @unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache index_range = eachnode(dg) @threaded for mortar in eachmpimortar(dg, cache) # Choose thread-specific pre-allocated container - fstar = fstar_primary_threaded[Threads.threadid()] + fstar_primary = fstar_primary_threaded[Threads.threadid()] + fstar_secondary = fstar_secondary_threaded[Threads.threadid()] fstar_tmp = fstar_tmp_threaded[Threads.threadid()] # Get index information on the small elements @@ -412,7 +447,8 @@ function calc_mpi_mortar_flux!(surface_flux_values, normal_direction = get_normal_direction(cache.mpi_mortars, i, j, position, mortar) - calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh, + nonconservative_terms, equations, surface_integral, dg, cache, mortar, position, normal_direction, i, j) @@ -433,14 +469,15 @@ function calc_mpi_mortar_flux!(surface_flux_values, mpi_mortar_fluxes_to_elements!(surface_flux_values, mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer, fstar_tmp) + mortar, fstar_primary, fstar_secondary, u_buffer, + fstar_tmp) end return nothing end # Inlined version of the mortar flux computation on small elements for conservation laws -@inline function calc_mpi_mortar_flux!(fstar, +@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, nonconservative_terms::False, equations, @@ -456,16 +493,48 @@ end flux = surface_flux(u_ll, u_rr, normal_direction, equations) # Copy flux to buffer - set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, + set_node_vars!(fstar_primary, flux, equations, dg, i_node_index, j_node_index, + position_index) + set_node_vars!(fstar_secondary, flux, equations, dg, i_node_index, j_node_index, position_index) end +# Inlined version of the mortar flux computation on small elements for non-conservative equations +@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, + mesh::Union{ParallelP4estMesh{3}, + ParallelT8codeMesh{3}}, + nonconservative_terms::True, equations, + surface_integral, dg::DG, cache, + mortar_index, position_index, normal_direction, + i_node_index, j_node_index) + @unpack u = cache.mpi_mortars + surface_flux, nonconservative_flux = surface_integral.surface_flux + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, + j_node_index, mortar_index) + + flux = surface_flux(u_ll, u_rr, normal_direction, equations) + noncons_flux_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations) + noncons_flux_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, + equations) + + for v in eachvariable(equations) + fstar_primary[v, i_node_index, j_node_index, position_index] = flux[v] + + 0.5f0 * + noncons_flux_primary[v] + fstar_secondary[v, i_node_index, j_node_index, position_index] = flux[v] + + 0.5f0 * + noncons_flux_secondary[v] + end +end + @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, equations, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, + dg::DGSEM, cache, mortar, fstar_primary, + fstar_secondary, u_buffer, fstar_tmp) @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars index_range = eachnode(dg) @@ -487,22 +556,22 @@ end # Project small fluxes to large element. multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_lower, - view(fstar, .., 1), + view(fstar_secondary, .., 1), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_lower, - view(fstar, .., 2), + view(fstar_secondary, .., 2), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_upper, - view(fstar, .., 3), + view(fstar_secondary, .., 3), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_upper, - view(fstar, .., 4), + view(fstar_secondary, .., 4), fstar_tmp) # The flux is calculated in the outward direction of the small elements, # so the sign must be switched to get the flux in outward direction @@ -536,10 +605,10 @@ end for j in eachnode(dg) for i in eachnode(dg) for v in eachvariable(equations) - surface_flux_values[v, i, j, small_direction, element] = fstar[v, - i, - j, - position] + surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v, + i, + j, + position] end end end diff --git a/test/test_mpi_p4est_3d.jl b/test/test_mpi_p4est_3d.jl index d6018e51c81..663732b4ded 100644 --- a/test/test_mpi_p4est_3d.jl +++ b/test/test_mpi_p4est_3d.jl @@ -235,6 +235,80 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "p4est_3d_dgsem") @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end end + + @trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_mhd_alfven_wave_nonconforming.jl"), + l2=[ + 0.0001788543743594658, + 0.000624334205581902, + 0.00022892869974368887, + 0.0007223464581156573, + 0.0006651366626523314, + 0.0006287275014743352, + 0.000344484339916008, + 0.0007179788287557142, + 8.632896980651243e-7 + ], + linf=[ + 0.0010730565632763867, + 0.004596749809344033, + 0.0013235269262853733, + 0.00468874234888117, + 0.004719267084104306, + 0.004228339352211896, + 0.0037503625505571625, + 0.005104176909383168, + 9.738081186490818e-6 + ], + tspan=(0.0, 0.25), + coverage_override=(trees_per_dimension = (1, 1, 1),)) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + end + + # Same test as above but with only one tree in the mesh + # We use it to test meshes with elements of different size in each partition + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_mhd_alfven_wave_nonconforming.jl"), + l2=[ + 0.0019054118500017054, + 0.006957977226608083, + 0.003429930594167365, + 0.009051598556176287, + 0.0077261662742688425, + 0.008210851821439208, + 0.003763030674412298, + 0.009175470744760567, + 2.881690753923244e-5 + ], + linf=[ + 0.010983704624623503, + 0.04584128974425262, + 0.02022630484954286, + 0.04851342295826149, + 0.040710154751363525, + 0.044722299260292586, + 0.036591209423654236, + 0.05701669133068068, + 0.00024182906501186622 + ], + tspan=(0.0, 0.25), trees_per_dimension=(1, 1, 1), + coverage_override=(trees_per_dimension = (1, 1, 1),)) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end end end # P4estMesh MPI