From 8743712d824337afbf99601ad5531f7040d95852 Mon Sep 17 00:00:00 2001 From: Daniel Doehring Date: Thu, 31 Oct 2024 17:44:41 +0100 Subject: [PATCH] Thread-Parallelize blended DG-FV (#2138) * Prototype SCHG 2D thread-para * remove not-needed datastructures * optimize * 1D, 3D * dgmulti * Apply suggestions from code review Co-authored-by: Hendrik Ranocha --------- Co-authored-by: Hendrik Ranocha --- src/solvers/dgmulti/shock_capturing.jl | 77 ++++++++------------------ src/solvers/dgsem_tree/dg.jl | 27 --------- src/solvers/dgsem_tree/dg_1d.jl | 52 ++++++++--------- src/solvers/dgsem_tree/dg_2d.jl | 49 ++++++++-------- src/solvers/dgsem_tree/dg_3d.jl | 52 ++++++++--------- 5 files changed, 90 insertions(+), 167 deletions(-) diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl index 99261b82edf..0a4a3623206 100644 --- a/src/solvers/dgmulti/shock_capturing.jl +++ b/src/solvers/dgmulti/shock_capturing.jl @@ -2,9 +2,6 @@ function create_cache(mesh::DGMultiMesh{NDIMS}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGMultiFluxDiff{<:GaussSBP}, RealT, uEltype) where {NDIMS} - element_ids_dg = Int[] - element_ids_dgfv = Int[] - # build element to element (element_to_element_connectivity) connectivity for smoothing of # shock capturing parameters. face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix @@ -31,8 +28,7 @@ function create_cache(mesh::DGMultiMesh{NDIMS}, equations, sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .> 100 * eps() - return (; element_ids_dg, element_ids_dgfv, - sparse_hybridized_SBP_operators, sparsity_pattern, + return (; sparse_hybridized_SBP_operators, sparsity_pattern, element_to_element_connectivity) end @@ -151,70 +147,41 @@ function apply_smoothing!(mesh::DGMultiMesh, alpha, alpha_tmp, dg::DGMulti, cach end end -# pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) -# -# Given blending factors `alpha` and the solver `dg`, fill -# `element_ids_dg` with the IDs of elements using a pure DG scheme and -# `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme. -function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, - mesh::DGMultiMesh, dg::DGMulti) - empty!(element_ids_dg) - empty!(element_ids_dgfv) - # For `Float64`, this gives 1.8189894035458565e-12 - # For `Float32`, this gives 1.1920929f-5 - RealT = eltype(alpha) - atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) - - for element in eachelement(mesh, dg) - # Clip blending factor for values close to zero (-> pure DG) - dg_only = isapprox(alpha[element], 0, atol = atol) - if dg_only - push!(element_ids_dg, element) - else - push!(element_ids_dgfv, element) - end - end - - return nothing -end - function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGMultiFluxDiff, cache) - (; element_ids_dg, element_ids_dgfv) = cache (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, - equations, volume_flux_dg, dg, cache) - end + # For `Float64`, this gives 1.8189894035458565e-12 + # For `Float32`, this gives 1.1920929f-5 + RealT = eltype(alpha) + atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) - # Loop over blended DG-FV elements, blend the high and low order RHS contributions - # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`. - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] + @threaded for element in eachelement(mesh, dg) alpha_element = alpha[element] + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha_element, 0, atol = atol) - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) - - # Calculate "FV" low order volume integral contribution - low_order_flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_fv, dg, cache, alpha_element) + if dg_only + flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + equations, volume_flux_dg, dg, cache) + else + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) + + # Calculate "FV" low order volume integral contribution + low_order_flux_differencing_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, alpha_element) + end end return nothing diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl index febfb0b121f..fd9728168d8 100644 --- a/src/solvers/dgsem_tree/dg.jl +++ b/src/solvers/dgsem_tree/dg.jl @@ -15,33 +15,6 @@ function reset_du!(du, dg, cache) return du end -# pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) -# -# Given blending factors `alpha` and the solver `dg`, fill -# `element_ids_dg` with the IDs of elements using a pure DG scheme and -# `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme. -function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg::DG, - cache) - empty!(element_ids_dg) - empty!(element_ids_dgfv) - # For `Float64`, this gives 1.8189894035458565e-12 - # For `Float32`, this gives 1.1920929f-5 - RealT = eltype(alpha) - atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) - - for element in eachelement(dg, cache) - # Clip blending factor for values close to zero (-> pure DG) - dg_only = isapprox(alpha[element], 0, atol = atol) - if dg_only - push!(element_ids_dg, element) - else - push!(element_ids_dgfv, element) - end - end - - return nothing -end - function volume_jacobian(element, mesh::TreeMesh, cache) return inv(cache.elements.inverse_jacobian[element])^ndims(mesh) end diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index d9a8cc8bc6d..b5a67027541 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -42,9 +42,6 @@ end function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] - cache = create_cache(mesh, equations, VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), dg, uEltype) @@ -55,8 +52,7 @@ function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) for _ in 1:Threads.nthreads()] - return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, - fstar1_R_threaded) + return (; cache..., fstar1_L_threaded, fstar1_R_threaded) end function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, @@ -256,37 +252,35 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, - equations, - volume_flux_dg, dg, cache) - end - - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] + # For `Float64`, this gives 1.8189894035458565e-12 + # For `Float32`, this gives 1.1920929f-5 + RealT = eltype(alpha) + atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) + @threaded for element in eachelement(dg, cache) alpha_element = alpha[element] - - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, - equations, - volume_flux_dg, dg, cache, 1 - alpha_element) - - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha_element, 0, atol = atol) + + if dg_only + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache) + else + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) + + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end end return nothing diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 03f28659f5e..0585093b7a4 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -47,9 +47,6 @@ end function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] - cache = create_cache(mesh, equations, VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), dg, uEltype) @@ -66,7 +63,7 @@ function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMe fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg) + 1) for _ in 1:Threads.nthreads()] - return (; cache..., element_ids_dg, element_ids_dgfv, + return (; cache..., fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded) end @@ -338,37 +335,35 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache) - end - - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] + # For `Float64`, this gives 1.8189894035458565e-12 + # For `Float32`, this gives 1.1920929f-5 + RealT = eltype(alpha) + atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) + @threaded for element in eachelement(dg, cache) alpha_element = alpha[element] + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha_element, 0, atol = atol) - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) - - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) + if dg_only + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache) + else + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) + + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end end return nothing diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 85150bc3040..4b3bc81ff8b 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -47,9 +47,6 @@ function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] - cache = create_cache(mesh, equations, VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), dg, uEltype) @@ -76,8 +73,7 @@ function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, nnodes(dg), nnodes(dg) + 1) for _ in 1:Threads.nthreads()] - return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, - fstar1_R_threaded, + return (; cache..., fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded) end @@ -386,37 +382,35 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache) - end - - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] + # For `Float64`, this gives 1.8189894035458565e-12 + # For `Float32`, this gives 1.1920929f-5 + RealT = eltype(alpha) + atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0)) + @threaded for element in eachelement(dg, cache) alpha_element = alpha[element] - - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) - - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha_element, 0, atol = atol) + + if dg_only + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache) + else + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) + + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end end return nothing