diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 5aabbf7ac60..0632e3d3d8e 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -10,9 +10,14 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype) # TODO: Taal compare performance of different types - fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), - nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + fstar_primary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), + nnodes(mortar_l2), + nnodes(mortar_l2), 4) + for _ in 1:Threads.nthreads()] + fstar_secondary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), + nnodes(mortar_l2), + nnodes(mortar_l2), 4) + for _ in 1:Threads.nthreads()] fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) @@ -21,7 +26,7 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, nnodes(mortar_l2)) for _ in 1:Threads.nthreads()] - (; fstar_threaded, fstar_tmp_threaded, u_threaded) + (; fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded, u_threaded) end # index_to_start_step_3d(index::Symbol, index_range) @@ -521,12 +526,13 @@ function calc_mortar_flux!(surface_flux_values, surface_integral, dg::DG, cache) @unpack neighbor_ids, node_indices = cache.mortars @unpack contravariant_vectors = cache.elements - @unpack fstar_threaded, fstar_tmp_threaded = cache + @unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache index_range = eachnode(dg) @threaded for mortar in eachmortar(dg, cache) # Choose thread-specific pre-allocated container - fstar = fstar_threaded[Threads.threadid()] + fstar_primary = fstar_primary_threaded[Threads.threadid()] + fstar_secondary = fstar_secondary_threaded[Threads.threadid()] fstar_tmp = fstar_tmp_threaded[Threads.threadid()] # Get index information on the small elements @@ -555,7 +561,8 @@ function calc_mortar_flux!(surface_flux_values, i_small, j_small, k_small, element) - calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + calc_mortar_flux!(fstar_primary, fstar_secondary, mesh, + nonconservative_terms, equations, surface_integral, dg, cache, mortar, position, normal_direction, i, j) @@ -581,14 +588,15 @@ function calc_mortar_flux!(surface_flux_values, # "mortar_fluxes_to_elements!" instead. mortar_fluxes_to_elements!(surface_flux_values, mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer, fstar_tmp) + mortar, fstar_primary, fstar_secondary, u_buffer, + fstar_tmp) end return nothing end # Inlined version of the mortar flux computation on small elements for conservation fluxes -@inline function calc_mortar_flux!(fstar, +@inline function calc_mortar_flux!(fstar_primary, fstar_secondary, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache, @@ -603,13 +611,15 @@ end flux = surface_flux(u_ll, u_rr, normal_direction, equations) # Copy flux to buffer - set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, + set_node_vars!(fstar_primary, flux, equations, dg, i_node_index, j_node_index, + position_index) + set_node_vars!(fstar_secondary, flux, equations, dg, i_node_index, j_node_index, position_index) end # Inlined version of the mortar flux computation on small elements for conservation fluxes # with nonconservative terms -@inline function calc_mortar_flux!(fstar, +@inline function calc_mortar_flux!(fstar_primary, fstar_secondary, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache, @@ -627,11 +637,19 @@ end # Compute nonconservative flux and add it to the flux scaled by a factor of 0.5 based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - noncons = nonconservative_flux(u_ll, u_rr, normal_direction, equations) - flux_plus_noncons = flux + 0.5f0 * noncons + noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations) + noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, equations) + flux_plus_noncons_primary = flux + 0.5f0 * noncons_primary + flux_plus_noncons_secondary = flux + 0.5f0 * noncons_secondary # Copy to buffer - set_node_vars!(fstar, flux_plus_noncons, equations, dg, i_node_index, j_node_index, + set_node_vars!(fstar_primary, flux_plus_noncons_primary, equations, dg, + i_node_index, + j_node_index, + position_index) + set_node_vars!(fstar_secondary, flux_plus_noncons_secondary, equations, dg, + i_node_index, + j_node_index, position_index) end @@ -639,8 +657,8 @@ end mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, u_buffer, - fstar_tmp) + dg::DGSEM, cache, mortar, fstar_primary, + fstar_secondary, u_buffer, fstar_tmp) @unpack neighbor_ids, node_indices = cache.mortars index_range = eachnode(dg) @@ -652,8 +670,10 @@ end element = neighbor_ids[position, mortar] for j in eachnode(dg), i in eachnode(dg) for v in eachvariable(equations) - surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, - position] + surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v, + i, + j, + position] end end end @@ -661,19 +681,19 @@ end # Project small fluxes to large element. multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_lower, - view(fstar, .., 1), + view(fstar_secondary, .., 1), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_lower, - view(fstar, .., 2), + view(fstar_secondary, .., 2), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_upper, - view(fstar, .., 3), + view(fstar_secondary, .., 3), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_upper, - view(fstar, .., 4), + view(fstar_secondary, .., 4), fstar_tmp) # The flux is calculated in the outward direction of the small elements, diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl index 3f286ca01fc..b1204e5a15c 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl @@ -271,7 +271,8 @@ end mesh::P4estMesh{3}, equations::AbstractEquationsParabolic, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, u_buffer, + dg::DGSEM, cache, mortar, fstar_primary, + fstar_secondary, u_buffer, fstar_tmp) @unpack neighbor_ids, node_indices = cache.mortars index_range = eachnode(dg) @@ -283,8 +284,10 @@ end element = neighbor_ids[position, mortar] for j in eachnode(dg), i in eachnode(dg) for v in eachvariable(equations) - surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, - position] + surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v, + i, + j, + position] end end end @@ -292,19 +295,19 @@ end # Project small fluxes to large element. multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_lower, - view(fstar, .., 1), + view(fstar_secondary, .., 1), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_lower, - view(fstar, .., 2), + view(fstar_secondary, .., 2), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_lower, mortar_l2.reverse_upper, - view(fstar, .., 3), + view(fstar_secondary, .., 3), fstar_tmp) add_multiply_dimensionwise!(u_buffer, mortar_l2.reverse_upper, mortar_l2.reverse_upper, - view(fstar, .., 4), + view(fstar_secondary, .., 4), fstar_tmp) # The flux is calculated in the outward direction of the small elements, @@ -788,12 +791,12 @@ function calc_mortar_flux_divergence!(surface_flux_values, surface_integral, dg::DG, cache) @unpack neighbor_ids, node_indices = cache.mortars @unpack contravariant_vectors = cache.elements - @unpack fstar_threaded, fstar_tmp_threaded = cache + @unpack fstar_primary_threaded, fstar_tmp_threaded = cache index_range = eachnode(dg) @threaded for mortar in eachmortar(dg, cache) # Choose thread-specific pre-allocated container - fstar = fstar_threaded[Threads.threadid()] + fstar = fstar_primary_threaded[Threads.threadid()] fstar_tmp = fstar_tmp_threaded[Threads.threadid()] # Get index information on the small elements @@ -842,7 +845,7 @@ function calc_mortar_flux_divergence!(surface_flux_values, # this reuses the hyperbolic version of `mortar_fluxes_to_elements!` mortar_fluxes_to_elements!(surface_flux_values, mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer, fstar_tmp) + mortar, fstar, fstar, u_buffer, fstar_tmp) end return nothing @@ -851,7 +854,7 @@ end # NOTE: Use analogy to "calc_mortar_flux!" for hyperbolic eqs with no nonconservative terms. # Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for # hyperbolic terms with conserved terms only, i.e., no nonconservative terms. -@inline function calc_mortar_flux!(fstar, +@inline function calc_mortar_flux!(fstar_primary, fstar_secondary, mesh::P4estMesh{3}, nonconservative_terms::False, equations::AbstractEquationsParabolic, @@ -867,7 +870,9 @@ end # TODO: parabolic; only BR1 at the moment flux_ = 0.5f0 * (u_ll + u_rr) # Copy flux to buffer - set_node_vars!(fstar, flux_, equations, dg, i_node_index, j_node_index, + set_node_vars!(fstar_primary, flux_, equations, dg, i_node_index, j_node_index, + position_index) + set_node_vars!(fstar_secondary, flux_, equations, dg, i_node_index, j_node_index, position_index) end diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 635c8dc795e..3daca10e821 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -384,12 +384,12 @@ function calc_mpi_mortar_flux!(surface_flux_values, surface_integral, dg::DG, cache) @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars @unpack contravariant_vectors = cache.elements - @unpack fstar_threaded, fstar_tmp_threaded = cache + @unpack fstar_primary_threaded, fstar_tmp_threaded = cache index_range = eachnode(dg) @threaded for mortar in eachmpimortar(dg, cache) # Choose thread-specific pre-allocated container - fstar = fstar_threaded[Threads.threadid()] + fstar = fstar_primary_threaded[Threads.threadid()] fstar_tmp = fstar_tmp_threaded[Threads.threadid()] # Get index information on the small elements diff --git a/test/test_p4est_3d.jl b/test/test_p4est_3d.jl index 5cec16300f8..3d2db528a14 100644 --- a/test/test_p4est_3d.jl +++ b/test/test_p4est_3d.jl @@ -547,16 +547,28 @@ end @trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_alfven_wave_nonconforming.jl"), - l2=[0.00019018725889431733, 0.0006523517707148006, - 0.0002401595437705759, 0.0007796920661427565, - 0.0007095787460334334, 0.0006558819731628876, - 0.0003565026134076906, 0.0007904654548841712, - 9.437300326448332e-7], - linf=[0.0012482306861187897, 0.006408776208178299, - 0.0016845452099629663, 0.0068711236542984555, - 0.004626581522263695, 0.006614624811393632, - 0.0030068344747734566, 0.008277825749754025, - 1.3475027166309006e-5], + l2=[ + 0.0001788543743594658, + 0.000624334205581902, + 0.00022892869974368887, + 0.0007223464581156573, + 0.0006651366626523314, + 0.0006287275014743352, + 0.000344484339916008, + 0.0007179788287557142, + 8.632896980651243e-7 + ], + linf=[ + 0.0010730565632763867, + 0.004596749809344033, + 0.0013235269262853733, + 0.00468874234888117, + 0.004719267084104306, + 0.004228339352211896, + 0.0037503625505571625, + 0.005104176909383168, + 9.738081186490818e-6 + ], tspan=(0.0, 0.25), coverage_override=(trees_per_dimension = (1, 1, 1),)) # Ensure that we do not have excessive memory allocations @@ -571,16 +583,28 @@ end @trixi_testset "elixir_mhd_shockcapturing_amr.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_shockcapturing_amr.jl"), - l2=[0.006297229188267704, 0.006436347763092648, - 0.0071091348227321095, 0.00652953798427642, - 0.0206148702828057, 0.005561406556411695, - 0.007570747563696005, 0.005571060186513173, - 3.888176398720913e-6], - linf=[0.20904050630623572, 0.1863002690612441, - 0.2347653795205547, 0.19430178062881898, - 0.6858488630270272, 0.15169972127018583, - 0.22431157058134898, 0.16823638722404644, - 0.0005208971463830214], + l2=[ + 0.0062973565893792004, + 0.006436273914579104, + 0.007112703307027178, + 0.006529650167358523, + 0.020607452343745017, + 0.005560993001492338, + 0.007576418168749763, + 0.0055721349394598635, + 3.8269125984310296e-6 + ], + linf=[ + 0.2090718196650192, + 0.1863884052971854, + 0.23475479927204168, + 0.19460789763442982, + 0.6859816363887359, + 0.15171474186273914, + 0.22404690260234983, + 0.16808957604979002, + 0.0005083795485317637 + ], tspan=(0.0, 0.04), coverage_override=(maxiters = 6, initial_refinement_level = 1, base_level = 1, max_level = 2)) @@ -597,26 +621,26 @@ end @trixi_testset "elixir_mhd_amr_entropy_bounded.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_amr_entropy_bounded.jl"), l2=[ - 0.005430176785094096, - 0.006185803468926062, - 0.012158513265762224, - 0.006185144232789619, - 0.03509140423905665, - 0.004968215426326584, - 0.006553519141867704, - 0.005008885124643863, - 5.165777182726578e-6 + 0.005430006338127661, + 0.006186402899876596, + 0.012171513410597289, + 0.006181479343504159, + 0.035068817354117605, + 0.004967715666538709, + 0.006592173316509503, + 0.0050151140388451105, + 5.146547644807638e-6 ], linf=[ - 0.1864317840224794, - 0.2041246899193812, - 0.36992946717578445, - 0.2327158690965257, - 1.0368624176126007, - 0.1846308291826353, - 0.2062255411778191, - 0.18955666546331185, - 0.0005208969502913304 + 0.18655204102670386, + 0.20397573777286138, + 0.3700839435299759, + 0.23329319876321034, + 1.0348619438460904, + 0.18462694496595722, + 0.20648634653698617, + 0.18947822281424997, + 0.0005083794158781671 ], tspan=(0.0, 0.04), coverage_override=(maxiters = 6, initial_refinement_level = 1,