diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 00000000000..8518d202812 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1,8 @@ +# Use SciML style: https://github.com/SciML/SciMLStyle +style = "sciml" + +# Python style alignment. See https://github.com/domluna/JuliaFormatter.jl/pull/732. +yas_style_nesting = true + +# Align struct fields for better readability of large struct definitions +align_struct_field = true diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 5d9632b1653..6b557960c89 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: - version: '1.8' + version: '1.9' show-versioninfo: true - uses: julia-actions/julia-buildpkg@v1 env: diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml new file mode 100644 index 00000000000..628d938dd76 --- /dev/null +++ b/.github/workflows/FormatCheck.yml @@ -0,0 +1,44 @@ +name: format-check + +on: + push: + branches: + - 'main' + tags: '*' + pull_request: + +jobs: + check-format: + runs-on: ${{ matrix.os }} + strategy: + matrix: + julia-version: [1] + julia-arch: [x86] + os: [ubuntu-latest] + steps: + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.julia-version }} + + - uses: actions/checkout@v3 + - name: Install JuliaFormatter and format + # This will use the latest version by default but you can set the version like so: + # + # julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version = "0.13.0"))' + # + # TODO: Change the call below to + # format(".") + run: | + julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter"))' + julia -e 'using JuliaFormatter; format(["benchmark", "ext", "src", "utils"])' + - name: Format check + run: | + julia -e ' + out = Cmd(`git diff --name-only`) |> read |> String + if out == "" + exit(0) + else + @error "Some files have not been formatted !!!" + write(stdout, out) + exit(1) + end' diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml index 09612788f59..c4ab3a98557 100644 --- a/.github/workflows/SpellCheck.yml +++ b/.github/workflows/SpellCheck.yml @@ -10,4 +10,4 @@ jobs: - name: Checkout Actions Repository uses: actions/checkout@v3 - name: Check spelling - uses: crate-ci/typos@v1.14.9 + uses: crate-ci/typos@v1.14.12 diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 01a6c99e843..c5c95558c8c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -11,7 +11,7 @@ jobs: os: - ubuntu-latest version: - - '1.8' + - '1.9' arch: - x64 steps: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32420c458ba..b0a2c93db3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,8 @@ jobs: fail-fast: false matrix: version: - - '1.8' + - '1.9' + # - '~1.9.0-0' # including development versions # - 'nightly' os: - ubuntu-latest @@ -79,18 +80,22 @@ jobs: - threaded include: - version: '1.8' + os: ubuntu-latest + arch: x64 + trixi_test: threaded_legacy + - version: '1.9' os: macOS-latest arch: x64 trixi_test: mpi - - version: '1.8' + - version: '1.9' os: macOS-latest arch: x64 trixi_test: threaded - - version: '1.8' + - version: '1.9' os: windows-latest arch: x64 trixi_test: mpi - - version: '1.8' + - version: '1.9' os: windows-latest arch: x64 trixi_test: threaded @@ -121,7 +126,7 @@ jobs: TRIXI_TEST: ${{ matrix.trixi_test }} - uses: julia-actions/julia-processcoverage@v1 with: - directories: src,examples + directories: src,examples,ext - uses: codecov/codecov-action@v3 with: file: ./lcov.info diff --git a/Project.toml b/Project.toml index cb27c828cf4..9d51e4dcffc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Trixi" uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" authors = ["Michael Schlottke-Lakemper ", "Gregor Gassner ", "Hendrik Ranocha ", "Andrew R. Winters ", "Jesse Chan "] -version = "0.5.25-pre" +version = "0.5.29-pre" [deps] CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" @@ -42,6 +42,12 @@ Triangulate = "f7e6ffb2-c36d-4f8f-a77e-16e897189344" TriplotBase = "981d1d27-644d-49a2-9326-4793e63143c3" TriplotRecipes = "808ab39a-a642-4abf-81ff-4cb34ebbffa3" +[weakdeps] +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + +[extensions] +TrixiMakieExt = "Makie" + [compat] CodeTracking = "1.0.5" ConstructionBase = "1.3" @@ -53,6 +59,7 @@ HDF5 = "0.14, 0.15, 0.16" IfElse = "0.1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.118" +Makie = "0.19" MPI = "0.20" MuladdMacro = "0.2.2" Octavian = "0.3.5" @@ -66,7 +73,7 @@ Requires = "1.1" SciMLBase = "1.90" Setfield = "0.8, 1" SimpleUnPack = "1.1" -StartUpDG = "0.16" +StartUpDG = "0.17" Static = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8" StaticArrayInterface = "1.4" StaticArrays = "1" @@ -78,3 +85,6 @@ Triangulate = "2.0" TriplotBase = "0.1" TriplotRecipes = "0.1" julia = "1.8" + +[extras] +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" diff --git a/README.md b/README.md index 509435923a4..ccd70b6daf8 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ installation and postprocessing procedures. Its features include: ## Installation If you have not yet installed Julia, please [follow the instructions for your operating system](https://julialang.org/downloads/platform/). Trixi.jl works -with Julia v1.8. +with Julia v1.8 and newer. We recommend using the latest stable release of Julia. ### For users Trixi.jl and its related tools are registered Julia packages. Hence, you diff --git a/benchmark/benchmark_ec.jl b/benchmark/benchmark_ec.jl index f714d933b77..5bd20b41111 100644 --- a/benchmark/benchmark_ec.jl +++ b/benchmark/benchmark_ec.jl @@ -1,50 +1,50 @@ using Printf, BenchmarkTools, Trixi -function run_benchmarks(benchmark_run; levels=0:5, polydeg=3) - runtimes = zeros(length(levels)) - for (idx,initial_refinement_level) in enumerate(levels) - result = benchmark_run(; initial_refinement_level, polydeg) - display(result) - runtimes[idx] = result |> median |> time # in nanoseconds - end - return (; levels, runtimes, polydeg) +function run_benchmarks(benchmark_run; levels = 0:5, polydeg = 3) + runtimes = zeros(length(levels)) + for (idx, initial_refinement_level) in enumerate(levels) + result = benchmark_run(; initial_refinement_level, polydeg) + display(result) + runtimes[idx] = result |> median |> time # in nanoseconds + end + return (; levels, runtimes, polydeg) end function tabulate_benchmarks(args...; kwargs...) - result = run_benchmarks(args...; kwargs...) - println("#Elements | Runtime in seconds") - for (level,runtime) in zip(result.levels, result.runtimes) - @printf("%9d | %.2e\n", 4^level, 1.0e-9 * runtime) - end - for (level,runtime) in zip(result.levels, result.runtimes) - @printf("%.16e\n", 1.0e-9 * runtime) - end + result = run_benchmarks(args...; kwargs...) + println("#Elements | Runtime in seconds") + for (level, runtime) in zip(result.levels, result.runtimes) + @printf("%9d | %.2e\n", 4^level, 1.0e-9*runtime) + end + for (level, runtime) in zip(result.levels, result.runtimes) + @printf("%.16e\n", 1.0e-9*runtime) + end end -function benchmark_euler(; initial_refinement_level=1, polydeg=3) +function benchmark_euler(; initial_refinement_level = 1, polydeg = 3) + γ = 1.4 + equations = CompressibleEulerEquations2D(γ) - γ = 1.4 - equations = CompressibleEulerEquations2D(γ) + surface_flux = flux_ranocha + volume_flux = flux_ranocha + solver = DGSEM(polydeg, surface_flux, VolumeIntegralFluxDifferencing(volume_flux)) - surface_flux = flux_ranocha - volume_flux = flux_ranocha - solver = DGSEM(polydeg, surface_flux, VolumeIntegralFluxDifferencing(volume_flux)) + coordinates_min = (-2.0, -2.0) + coordinates_max = (2.0, 2.0) + mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = initial_refinement_level, + n_cells_max = 100_000) - coordinates_min = (-2.0, -2.0) - coordinates_max = ( 2.0, 2.0) - mesh = TreeMesh(coordinates_min, coordinates_max, - initial_refinement_level=initial_refinement_level, - n_cells_max=100_000) + semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_weak_blast_wave, + solver) - semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_weak_blast_wave, solver) + t0 = 0.0 + u0 = compute_coefficients(t0, semi) + du = similar(u0) - t0 = 0.0 - u0 = compute_coefficients(t0, semi) - du = similar(u0) - - @benchmark Trixi.rhs!($du, $u0, $semi, $t0) + @benchmark Trixi.rhs!($du, $u0, $semi, $t0) end # versioninfo(verbose=true) @show Threads.nthreads() -tabulate_benchmarks(benchmark_euler, levels=0:8) +tabulate_benchmarks(benchmark_euler, levels = 0:8) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index edaeed63577..a3f7d1d2569 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -1,3 +1,6 @@ +# Disable formatting this file since it contains highly unusual formatting for better +# readability +#! format: off using BenchmarkTools using Trixi diff --git a/benchmark/elixir_2d_euler_vortex_p4est.jl b/benchmark/elixir_2d_euler_vortex_p4est.jl index 6c151842ebb..3ee97cc752f 100644 --- a/benchmark/elixir_2d_euler_vortex_p4est.jl +++ b/benchmark/elixir_2d_euler_vortex_p4est.jl @@ -17,43 +17,42 @@ The classical isentropic vortex test case of [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543) """ function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D) - # needs appropriate mesh size, e.g. [-10,-10]x[10,10] - # make sure that the inicenter does not exit the domain, e.g. T=10.0 - # initial center of the vortex - inicenter = SVector(0.0, 0.0) - # size and strength of the vortex - iniamplitude = 0.2 - # base flow - rho = 1.0 - v1 = 1.0 - v2 = 1.0 - vel = SVector(v1, v2) - p = 10.0 - rt = p / rho # ideal gas equation - cent = inicenter + vel*t # advection of center - cent = x - cent # distance to centerpoint - #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r - # cross product with iniaxis = [0,0,1] - cent = SVector(-cent[2], cent[1]) - r2 = cent[1]^2 + cent[2]^2 - du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation - dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2 # isentrop - rho = rho * (1+dtemp)^(1\(equations.gamma-1)) - vel = vel + du*cent - v1, v2 = vel - p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1)) - prim = SVector(rho, v1, v2, p) - return prim2cons(prim, equations) + # needs appropriate mesh size, e.g. [-10,-10]x[10,10] + # make sure that the inicenter does not exit the domain, e.g. T=10.0 + # initial center of the vortex + inicenter = SVector(0.0, 0.0) + # size and strength of the vortex + iniamplitude = 0.2 + # base flow + rho = 1.0 + v1 = 1.0 + v2 = 1.0 + vel = SVector(v1, v2) + p = 10.0 + rt = p / rho # ideal gas equation + cent = inicenter + vel * t # advection of center + cent = x - cent # distance to centerpoint + #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r + # cross product with iniaxis = [0,0,1] + cent = SVector(-cent[2], cent[1]) + r2 = cent[1]^2 + cent[2]^2 + du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation + dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2 # isentrop + rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1)) + vel = vel + du * cent + v1, v2 = vel + p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1)) + prim = SVector(rho, v1, v2, p) + return prim2cons(prim, equations) end initial_condition = initial_condition_isentropic_vortex -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) coordinates_min = (-10.0, -10.0) -coordinates_max = ( 10.0, 10.0) -mesh = P4estMesh((1, 1), polydeg=Trixi.polydeg(solver), - coordinates_min=coordinates_min, coordinates_max=coordinates_max, - initial_refinement_level=4) - +coordinates_max = (10.0, 10.0) +mesh = P4estMesh((1, 1), polydeg = Trixi.polydeg(solver), + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 4) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) @@ -66,19 +65,20 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true, - extra_analysis_errors=(:conservation_error,), - extra_analysis_integrals=(entropy, energy_total, - energy_kinetic, energy_internal)) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + save_analysis = true, + extra_analysis_errors = (:conservation_error,), + extra_analysis_integrals = (entropy, energy_total, + energy_kinetic, + energy_internal)) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/elixir_2d_euler_vortex_structured.jl b/benchmark/elixir_2d_euler_vortex_structured.jl index 344ac2d20c8..5627049c9e2 100644 --- a/benchmark/elixir_2d_euler_vortex_structured.jl +++ b/benchmark/elixir_2d_euler_vortex_structured.jl @@ -17,43 +17,42 @@ The classical isentropic vortex test case of [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543) """ function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D) - # needs appropriate mesh size, e.g. [-10,-10]x[10,10] - # make sure that the inicenter does not exit the domain, e.g. T=10.0 - # initial center of the vortex - inicenter = SVector(0.0, 0.0) - # size and strength of the vortex - iniamplitude = 0.2 - # base flow - rho = 1.0 - v1 = 1.0 - v2 = 1.0 - vel = SVector(v1, v2) - p = 10.0 - rt = p / rho # ideal gas equation - cent = inicenter + vel*t # advection of center - cent = x - cent # distance to centerpoint - #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r - # cross product with iniaxis = [0,0,1] - cent = SVector(-cent[2], cent[1]) - r2 = cent[1]^2 + cent[2]^2 - du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation - dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2 # isentrop - rho = rho * (1+dtemp)^(1\(equations.gamma-1)) - vel = vel + du*cent - v1, v2 = vel - p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1)) - prim = SVector(rho, v1, v2, p) - return prim2cons(prim, equations) + # needs appropriate mesh size, e.g. [-10,-10]x[10,10] + # make sure that the inicenter does not exit the domain, e.g. T=10.0 + # initial center of the vortex + inicenter = SVector(0.0, 0.0) + # size and strength of the vortex + iniamplitude = 0.2 + # base flow + rho = 1.0 + v1 = 1.0 + v2 = 1.0 + vel = SVector(v1, v2) + p = 10.0 + rt = p / rho # ideal gas equation + cent = inicenter + vel * t # advection of center + cent = x - cent # distance to centerpoint + #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r + # cross product with iniaxis = [0,0,1] + cent = SVector(-cent[2], cent[1]) + r2 = cent[1]^2 + cent[2]^2 + du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation + dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2 # isentrop + rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1)) + vel = vel + du * cent + v1, v2 = vel + p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1)) + prim = SVector(rho, v1, v2, p) + return prim2cons(prim, equations) end initial_condition = initial_condition_isentropic_vortex -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) coordinates_min = (-10.0, -10.0) -coordinates_max = ( 10.0, 10.0) +coordinates_max = (10.0, 10.0) cells_per_dimension = (16, 16) mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max) - semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) ############################################################################### @@ -65,19 +64,20 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true, - extra_analysis_errors=(:conservation_error,), - extra_analysis_integrals=(entropy, energy_total, - energy_kinetic, energy_internal)) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + save_analysis = true, + extra_analysis_errors = (:conservation_error,), + extra_analysis_integrals = (entropy, energy_total, + energy_kinetic, + energy_internal)) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/elixir_2d_euler_vortex_tree.jl b/benchmark/elixir_2d_euler_vortex_tree.jl index b3873b4e01b..68e207c5344 100644 --- a/benchmark/elixir_2d_euler_vortex_tree.jl +++ b/benchmark/elixir_2d_euler_vortex_tree.jl @@ -17,43 +17,42 @@ The classical isentropic vortex test case of [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543) """ function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D) - # needs appropriate mesh size, e.g. [-10,-10]x[10,10] - # make sure that the inicenter does not exit the domain, e.g. T=10.0 - # initial center of the vortex - inicenter = SVector(0.0, 0.0) - # size and strength of the vortex - iniamplitude = 0.2 - # base flow - rho = 1.0 - v1 = 1.0 - v2 = 1.0 - vel = SVector(v1, v2) - p = 10.0 - rt = p / rho # ideal gas equation - cent = inicenter + vel*t # advection of center - cent = x - cent # distance to centerpoint - #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r - # cross product with iniaxis = [0,0,1] - cent = SVector(-cent[2], cent[1]) - r2 = cent[1]^2 + cent[2]^2 - du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation - dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2 # isentrop - rho = rho * (1+dtemp)^(1\(equations.gamma-1)) - vel = vel + du*cent - v1, v2 = vel - p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1)) - prim = SVector(rho, v1, v2, p) - return prim2cons(prim, equations) + # needs appropriate mesh size, e.g. [-10,-10]x[10,10] + # make sure that the inicenter does not exit the domain, e.g. T=10.0 + # initial center of the vortex + inicenter = SVector(0.0, 0.0) + # size and strength of the vortex + iniamplitude = 0.2 + # base flow + rho = 1.0 + v1 = 1.0 + v2 = 1.0 + vel = SVector(v1, v2) + p = 10.0 + rt = p / rho # ideal gas equation + cent = inicenter + vel * t # advection of center + cent = x - cent # distance to centerpoint + #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r + # cross product with iniaxis = [0,0,1] + cent = SVector(-cent[2], cent[1]) + r2 = cent[1]^2 + cent[2]^2 + du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation + dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2 # isentrop + rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1)) + vel = vel + du * cent + v1, v2 = vel + p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1)) + prim = SVector(rho, v1, v2, p) + return prim2cons(prim, equations) end initial_condition = initial_condition_isentropic_vortex -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) coordinates_min = (-10.0, -10.0) -coordinates_max = ( 10.0, 10.0) +coordinates_max = (10.0, 10.0) mesh = TreeMesh(coordinates_min, coordinates_max, - initial_refinement_level=4, - n_cells_max=10_000) - + initial_refinement_level = 4, + n_cells_max = 10_000) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) @@ -66,19 +65,20 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true, - extra_analysis_errors=(:conservation_error,), - extra_analysis_integrals=(entropy, energy_total, - energy_kinetic, energy_internal)) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + save_analysis = true, + extra_analysis_errors = (:conservation_error,), + extra_analysis_integrals = (entropy, energy_total, + energy_kinetic, + energy_internal)) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/elixir_2d_euler_vortex_unstructured.jl b/benchmark/elixir_2d_euler_vortex_unstructured.jl index bd0ab4a8040..082b6648abf 100644 --- a/benchmark/elixir_2d_euler_vortex_unstructured.jl +++ b/benchmark/elixir_2d_euler_vortex_unstructured.jl @@ -18,42 +18,43 @@ The classical isentropic vortex test case of [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543) """ function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D) - # needs appropriate mesh size, e.g. [-10,-10]x[10,10] - # make sure that the inicenter does not exit the domain, e.g. T=10.0 - # initial center of the vortex - inicenter = SVector(0.0, 0.0) - # size and strength of the vortex - iniamplitude = 0.2 - # base flow - rho = 1.0 - v1 = 1.0 - v2 = 1.0 - vel = SVector(v1, v2) - p = 10.0 - rt = p / rho # ideal gas equation - cent = inicenter + vel*t # advection of center - cent = x - cent # distance to centerpoint - #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r - # cross product with iniaxis = [0,0,1] - cent = SVector(-cent[2], cent[1]) - r2 = cent[1]^2 + cent[2]^2 - du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation - dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2 # isentrop - rho = rho * (1+dtemp)^(1\(equations.gamma-1)) - vel = vel + du*cent - v1, v2 = vel - p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1)) - prim = SVector(rho, v1, v2, p) - return prim2cons(prim, equations) + # needs appropriate mesh size, e.g. [-10,-10]x[10,10] + # make sure that the inicenter does not exit the domain, e.g. T=10.0 + # initial center of the vortex + inicenter = SVector(0.0, 0.0) + # size and strength of the vortex + iniamplitude = 0.2 + # base flow + rho = 1.0 + v1 = 1.0 + v2 = 1.0 + vel = SVector(v1, v2) + p = 10.0 + rt = p / rho # ideal gas equation + cent = inicenter + vel * t # advection of center + cent = x - cent # distance to centerpoint + #cent=cross(iniaxis,cent) # distance to axis, tangent vector, length r + # cross product with iniaxis = [0,0,1] + cent = SVector(-cent[2], cent[1]) + r2 = cent[1]^2 + cent[2]^2 + du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation + dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2 # isentrop + rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1)) + vel = vel + du * cent + v1, v2 = vel + p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1)) + prim = SVector(rho, v1, v2, p) + return prim2cons(prim, equations) end initial_condition = initial_condition_isentropic_vortex -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) default_mesh_file = joinpath(@__DIR__, "mesh_uniform_cartesian.mesh") -isfile(default_mesh_file) || download("https://gist.githubusercontent.com/ranocha/f4ea19ba3b62348968c971db43d7798b/raw/a506abb9479c020920cf6068c142670fc1a9aadc/mesh_uniform_cartesian.mesh", default_mesh_file) +isfile(default_mesh_file) || + download("https://gist.githubusercontent.com/ranocha/f4ea19ba3b62348968c971db43d7798b/raw/a506abb9479c020920cf6068c142670fc1a9aadc/mesh_uniform_cartesian.mesh", + default_mesh_file) mesh_file = default_mesh_file -mesh = UnstructuredMesh2D(mesh_file, periodicity=true) - +mesh = UnstructuredMesh2D(mesh_file, periodicity = true) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) @@ -66,19 +67,20 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true, - extra_analysis_errors=(:conservation_error,), - extra_analysis_integrals=(entropy, energy_total, - energy_kinetic, energy_internal)) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + save_analysis = true, + extra_analysis_errors = (:conservation_error,), + extra_analysis_integrals = (entropy, energy_total, + energy_kinetic, + energy_internal)) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/elixir_3d_euler_source_terms_structured.jl b/benchmark/elixir_3d_euler_source_terms_structured.jl index 9a284653f6f..b44eb0caa7c 100644 --- a/benchmark/elixir_3d_euler_source_terms_structured.jl +++ b/benchmark/elixir_3d_euler_source_terms_structured.jl @@ -9,17 +9,15 @@ equations = CompressibleEulerEquations3D(1.4) initial_condition = initial_condition_convergence_test -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) coordinates_min = (0.0, 0.0, 0.0) coordinates_max = (2.0, 2.0, 2.0) cells_per_dimension = (4, 4, 4) mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max) - semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, - source_terms=source_terms_convergence_test) - + source_terms = source_terms_convergence_test) ############################################################################### # ODE solvers, callbacks etc. @@ -30,16 +28,15 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/elixir_3d_euler_source_terms_tree.jl b/benchmark/elixir_3d_euler_source_terms_tree.jl index 6a182da91e6..369b9359580 100644 --- a/benchmark/elixir_3d_euler_source_terms_tree.jl +++ b/benchmark/elixir_3d_euler_source_terms_tree.jl @@ -9,18 +9,16 @@ equations = CompressibleEulerEquations3D(1.4) initial_condition = initial_condition_convergence_test -solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) coordinates_min = (0.0, 0.0, 0.0) coordinates_max = (2.0, 2.0, 2.0) mesh = TreeMesh(coordinates_min, coordinates_max, - initial_refinement_level=2, - n_cells_max=10_000) - + initial_refinement_level = 2, + n_cells_max = 10_000) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, - source_terms=source_terms_convergence_test) - + source_terms = source_terms_convergence_test) ############################################################################### # ODE solvers, callbacks etc. @@ -31,16 +29,15 @@ ode = semidiscretize(semi, tspan) summary_callback = SummaryCallback() analysis_interval = 100 -analysis_callback = AnalysisCallback(semi, interval=analysis_interval) +analysis_callback = AnalysisCallback(semi, interval = analysis_interval) -alive_callback = AliveCallback(analysis_interval=analysis_interval) +alive_callback = AliveCallback(analysis_interval = analysis_interval) callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) - ############################################################################### # run the simulation sol = solve(ode, BS3(), - save_everystep=false, callback=callbacks); + save_everystep = false, callback = callbacks); summary_callback() # print the timer summary diff --git a/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl b/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl index e270941cbe9..e6dd0d47448 100644 --- a/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl +++ b/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl @@ -1,3 +1,7 @@ +# Disable formatting this file since it contains highly unusual formatting for better +# readability +#! format: off + import Pkg; Pkg.activate(@__DIR__); Pkg.instantiate() using BenchmarkTools diff --git a/benchmark/run_benchmarks.jl b/benchmark/run_benchmarks.jl index ea7aae3c8d4..3a92a9ba700 100644 --- a/benchmark/run_benchmarks.jl +++ b/benchmark/run_benchmarks.jl @@ -3,18 +3,17 @@ using PkgBenchmark using Trixi let results = judge(Trixi, - BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=1`), # target - BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=1`, id="main") # baseline - ) - - export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads1.md"), results) + BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=1`), # target + BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=1`, + id = "main")) + export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads1.md"), + results) end - let results = judge(Trixi, - BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=2`), # target - BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=2`, id="main") # baseline - ) - - export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads2.md"), results) + BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=2`), # target + BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=2`, + id = "main")) + export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads2.md"), + results) end diff --git a/docs/src/development.md b/docs/src/development.md index e6a24f0cf06..cead713d0d1 100644 --- a/docs/src/development.md +++ b/docs/src/development.md @@ -18,7 +18,7 @@ package, which tracks changed files and re-loads them automatically. Therefore, it is *highly recommended* to first install Revise with the following command in Julia: To enter the package REPL mode, press `]` in the standard Julia REPL mode. Then, execute ```julia-repl -(@v1.8) pkg> add Revise +(@v1.9) pkg> add Revise ``` Now you are able to run Trixi.jl from the REPL, change Trixi.jl code between runs, **and** enjoy the advantages of the compilation cache! Before you start using @@ -28,7 +28,7 @@ Another recommended package for working from the REPL is [OhMyREPL.jl](https://github.com/KristofferC/OhMyREPL.jl). It can be installed by running ```julia-repl -(@v1.8) pkg> add OhMyREPL +(@v1.9) pkg> add OhMyREPL ``` and adds syntax highlighting, bracket highlighting, and other helpful improvements for using Julia interactively. To automatically use OhMyREPL when @@ -244,7 +244,7 @@ see the call stack, and execute statements. The package can be installed in the Julia REPL by executing ```julia-repl -(@v1.8) pkg> add Infiltrator +(@v1.9) pkg> add Infiltrator ``` To load the package in the Julia REPL execute @@ -328,5 +328,5 @@ in Trixi2Vtk. To use a locally modified Trixi.jl clone instead of a Trixi.jl release, one can tell Pkg to use the local source code of Trixi.jl instead of a registered version by running ```julia-repl -(@v1.8) pkg> develop path/to/Trixi.jl +(@v1.9) pkg> develop path/to/Trixi.jl ``` diff --git a/docs/src/index.md b/docs/src/index.md index 1ee05860b67..3af785bc681 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -58,7 +58,7 @@ installation and postprocessing procedures. Its features include: ## Installation If you have not yet installed Julia, please [follow the instructions for your operating system](https://julialang.org/downloads/platform/). Trixi.jl works -with Julia v1.8. +with Julia v1.8 and newer. We recommend using the latest stable release of Julia. ### For users Trixi.jl and its related tools are registered Julia packages. Hence, you diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md index a6fc9585ec4..de367c086cc 100644 --- a/docs/src/styleguide.md +++ b/docs/src/styleguide.md @@ -1,10 +1,20 @@ # Style guide -The following lists a few coding conventions for Trixi.jl: +Coding style is an inherently personal - and thus hotly contested - issue. Since code is +usually "written once, read often", it helps regular developers, new users, and reviewers if +code is formatted consistently. We therefore believe in the merit of using a common coding +style throughout Trixi.jl, even at the expense that not everyone can be happy with every +detailed style decision. If you came here because you are furious about our code formatting +rules, here is a happy little whale for you to calm you down: 🐳 + +## Conventions +The following lists a few coding conventions for Trixi.jl. Note that in addition to these +conventions, we apply and enforce automated source code formatting +(see [below](@ref automated-source-code-formatting) for more details): * Modules, types, structs with `CamelCase`. * Functions, variables with lowercase `snake_case`. - * Indentation with 2 spaces (*never* tabs!), line continuations indented with 4 spaces. - * Maximum line length (strictly): **100**. + * Indentation with 4 spaces (*never* tabs!) + * Maximum line length (strictly): **92**. * Functions that mutate their *input* are named with a trailing `!`. * Functions order their parameters [similar to Julia Base](https://docs.julialang.org/en/v1/manual/style-guide/#Write-functions-with-argument-ordering-similar-to-Julia-Base-1). * The main modified argument comes first. For example, if the right-hand side `du` is modified, @@ -24,12 +34,34 @@ The following lists a few coding conventions for Trixi.jl: instead of `central_flux`. This helps when searching for available functions on the REPL (e.g., when trying to find all flux functions). -Based on that, and personal experience, a formatting tool with a few helpful -options is included in `utils/julia-format.jl`. Note, however, that this tool is -not yet optimal, as it re-indents too greedily. +## [Automated source code formatting](@id automated-source-code-formatting) +We use [JuliaFormatter.jl](https://github.com/domluna/JuliaFormatter.jl) to format the +source code of Trixi.jl, which will also enforce *some* of the [Conventions](@ref) listed +above (e.g., line length or indentation with 4 spaces are automatically handled, while +capitalization of names is not). Our format is mostly based on the +[SciML](https://domluna.github.io/JuliaFormatter.jl/stable/sciml_style/)-style formatting +rules. For more details you can have a look at the current +[`.JuliaFormatter.toml`](https://github.com/trixi-framework/Trixi.jl/blob/main/.JuliaFormatter.toml) +file that holds the configuration options we use for JuliaFormatter.jl. -This is a list of handy style guides that are mostly consistent with each -other and this guide, and which have been used as a basis: +Note that we expect all contributions to Trixi.jl to be formatted with JuliaFormatter.jl +before being merged to the `main` branch. We ensure this by running a automated check on all +PRs that verify that running JuliaFormatter.jl again will not change the source code. - * [https://www.juliaopt.org/JuMP.jl/stable/style/](https://www.juliaopt.org/JuMP.jl/stable/style/) - * [https://github.com/jrevels/YASGuide](https://github.com/jrevels/YASGuide) +To format your contributions before created a PR (or, at least, before requesting a review +of your PR), you need to install JuliaFormatter.jl first by running +```shell +julia -e 'using Pkg; Pkg.add("JuliaFormatter")' +``` +You can then recursively format all Julia files in the Trixi.jl repo by executing +```shell +julia -e 'using JuliaFormatter; format(".") +``` +from inside the Trixi.jl repository. For convenience, there is also a script you can +directly run from your terminal shell, which will automatically install JuliaFormatter in a +temporary environment and then run it: +```shell +utils/trixi-format.jl +``` +You can get more information about using the convenience script by running it with the +`--help`/`-h` flag. diff --git a/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl new file mode 100644 index 00000000000..1cd075e84ea --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl @@ -0,0 +1,83 @@ +using OrdinaryDiffEq +using Trixi + +############################################################################### +# semidiscretization of the linear advection-diffusion equation + +diffusivity() = 5.0e-2 +advection_velocity = (1.0, 0.0) +equations = LinearScalarAdvectionEquation2D(advection_velocity) +equations_parabolic = LaplaceDiffusion2D(diffusivity(), equations) + +function x_trans_periodic(x, domain_length=SVector(2 * pi), center=SVector(0.0)) + x_normalized = x .- center + x_shifted = x_normalized .% domain_length + x_offset = ((x_shifted .< -0.5 * domain_length) - (x_shifted .> 0.5 * domain_length)) .* domain_length + return center + x_shifted + x_offset +end + +# Define initial condition (copied from "examples/tree_1d_dgsem/elixir_advection_diffusion.jl") +function initial_condition_diffusive_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D) + # Store translated coordinate for easy use of exact solution + # Assumes that advection_velocity[2] = 0 (effectively that we are solving a 1D equation) + x_trans = x_trans_periodic(x[1] - equation.advection_velocity[1] * t) + + nu = diffusivity() + c = 0.0 + A = 1.0 + omega = 1.0 + scalar = c + A * sin(omega * sum(x_trans)) * exp(-nu * omega^2 * t) + return SVector(scalar) +end +initial_condition = initial_condition_diffusive_convergence_test + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) + +coordinates_min = (-pi, -pi) # minimum coordinates (min(x), min(y)) +coordinates_max = ( pi, pi) # maximum coordinates (max(x), max(y)) + +trees_per_dimension = (4, 4) +mesh = P4estMesh(trees_per_dimension, + polydeg=3, initial_refinement_level=2, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + periodicity=true) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolicParabolic(mesh, + (equations, equations_parabolic), + initial_condition, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span `tspan` +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan); + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval=analysis_interval) + +# The AliveCallback prints short status information in regular intervals +alive_callback = AliveCallback(analysis_interval=analysis_interval) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) + + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +time_int_tol = 1.0e-11 +sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, + ode_default_options()..., callback=callbacks) + +# Print the timer summary +summary_callback() diff --git a/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl new file mode 100644 index 00000000000..b438fb8a29c --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl @@ -0,0 +1,88 @@ +using OrdinaryDiffEq +using Trixi + +############################################################################### +# semidiscretization of the linear advection-diffusion equation + +diffusivity() = 5.0e-2 +advection_velocity = (1.0, 0.0) +equations = LinearScalarAdvectionEquation2D(advection_velocity) +equations_parabolic = LaplaceDiffusion2D(diffusivity(), equations) + +function x_trans_periodic(x, domain_length=SVector(2 * pi), center=SVector(0.0)) + x_normalized = x .- center + x_shifted = x_normalized .% domain_length + x_offset = ((x_shifted .< -0.5 * domain_length) - (x_shifted .> 0.5 * domain_length)) .* domain_length + return center + x_shifted + x_offset +end + +# Define initial condition (copied from "examples/tree_1d_dgsem/elixir_advection_diffusion.jl") +function initial_condition_diffusive_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D) + # Store translated coordinate for easy use of exact solution + # Assumes that advection_velocity[2] = 0 (effectively that we are solving a 1D equation) + x_trans = x_trans_periodic(x[1] - equation.advection_velocity[1] * t) + + nu = diffusivity() + c = 0.0 + A = 1.0 + omega = 1.0 + scalar = c + A * sin(omega * sum(x_trans)) * exp(-nu * omega^2 * t) + return SVector(scalar) +end +initial_condition = initial_condition_diffusive_convergence_test + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs) + +# This maps the domain [-1, 1]^2 to [-pi, pi]^2 while also +# introducing a curved warping to interior nodes. +function mapping(xi, eta) + x = xi + 0.1 * sin(pi * xi) * sin(pi * eta) + y = eta + 0.1 * sin(pi * xi) * sin(pi * eta) + return pi * SVector(x, y) +end + +trees_per_dimension = (4, 4) +mesh = P4estMesh(trees_per_dimension, + polydeg=3, initial_refinement_level=2, + mapping=mapping, + periodicity=true) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolicParabolic(mesh, + (equations, equations_parabolic), + initial_condition, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span `tspan` +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan); + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval=analysis_interval) + +# The AliveCallback prints short status information in regular intervals +alive_callback = AliveCallback(analysis_interval=analysis_interval) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback) + + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +time_int_tol = 1.0e-11 +sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, + ode_default_options()..., callback=callbacks) + +# Print the timer summary +summary_callback() diff --git a/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl b/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl new file mode 100644 index 00000000000..28518e7276a --- /dev/null +++ b/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl @@ -0,0 +1,56 @@ + +using OrdinaryDiffEq +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = 1.0 +equations = LinearScalarAdvectionEquation1D(advection_velocity) + +# Create DG solver with polynomial degree = 0, i.e., a first order finite volume solver, +# with (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg=0, surface_flux=flux_lax_friedrichs) + +coordinates_min = -1.0 # minimum coordinate +coordinates_max = 1.0 # maximum coordinate + +# Create a uniformly refined mesh with periodic boundaries +mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level=5, + n_cells_max=30_000) # set maximum capacity of tree data structure + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +ode = semidiscretize(semi, (0.0, 1.0)); + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval=100) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl=0.9) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback) + + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, Euler(), + dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep=false, callback=callbacks); + +# Print the timer summary +summary_callback() diff --git a/ext/TrixiMakieExt.jl b/ext/TrixiMakieExt.jl new file mode 100644 index 00000000000..1eb11f6a422 --- /dev/null +++ b/ext/TrixiMakieExt.jl @@ -0,0 +1,426 @@ +# Package extension for adding Makie-based features to Trixi.jl +module TrixiMakieExt + +# Required for visualization code +if isdefined(Base, :get_extension) + using Makie: Makie, GeometryBasics +else + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + using ..Makie: Makie, GeometryBasics +end + +# Use all exported symbols to avoid having to rewrite `recipes_makie.jl` +using Trixi + +# Use additional symbols that are not exported +using Trixi: PlotData2DTriangulated, TrixiODESolution, PlotDataSeries, ScalarData, @muladd, + wrap_array_native, mesh_equations_solver_cache + +# Import functions such that they can be extended with new methods +import Trixi: iplot, iplot! + +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# First some utilities +# Given a reference plotting triangulation, this function generates a plotting triangulation for +# the entire global mesh. The output can be plotted using `Makie.mesh`. +function global_plotting_triangulation_makie(pds::PlotDataSeries{ + <:PlotData2DTriangulated + }; + set_z_coordinate_zero = false) + @unpack variable_id = pds + pd = pds.plot_data + @unpack x, y, data, t = pd + + makie_triangles = Makie.to_triangles(t) + + # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element. + # Note: Float32 is required by GeometryBasics + num_plotting_nodes, num_elements = size(x) + trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements) + coordinates = zeros(Float32, num_plotting_nodes, 3) + for element in Base.OneTo(num_elements) + for i in Base.OneTo(num_plotting_nodes) + coordinates[i, 1] = x[i, element] + coordinates[i, 2] = y[i, element] + if set_z_coordinate_zero == false + coordinates[i, 3] = data[i, element][variable_id] + end + end + trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), + makie_triangles) + end + plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh + return plotting_mesh +end + +# Returns a list of `Makie.Point`s which can be used to plot the mesh, or a solution "wireframe" +# (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution). +function convert_PlotData2D_to_mesh_Points(pds::PlotDataSeries{<:PlotData2DTriangulated + }; + set_z_coordinate_zero = false) + @unpack variable_id = pds + pd = pds.plot_data + @unpack x_face, y_face, face_data = pd + + if set_z_coordinate_zero + # plot 2d surface by setting z coordinate to zero. + # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots. + sol_f = zeros(eltype(first(x_face)), size(x_face)) + else + sol_f = StructArrays.component(face_data, variable_id) + end + + # This line separates solution lines on each edge by NaNs to ensure that they are rendered + # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix + # whose columns correspond to different elements. We add NaN separators by appending a row of + # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up + # plotting. + xyz_wireframe = GeometryBasics.Point.(map(x -> vec(vcat(x, + fill(NaN, 1, size(x, 2)))), + (x_face, y_face, sol_f))...) + + return xyz_wireframe +end + +# Creates a GeometryBasics triangulation for the visualization of a ScalarData2D plot object. +function global_plotting_triangulation_makie(pd::PlotData2DTriangulated{<:ScalarData}; + set_z_coordinate_zero = false) + @unpack x, y, data, t = pd + + makie_triangles = Makie.to_triangles(t) + + # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element. + # Note: Float32 is required by GeometryBasics + num_plotting_nodes, num_elements = size(x) + trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements) + coordinates = zeros(Float32, num_plotting_nodes, 3) + for element in Base.OneTo(num_elements) + for i in Base.OneTo(num_plotting_nodes) + coordinates[i, 1] = x[i, element] + coordinates[i, 2] = y[i, element] + if set_z_coordinate_zero == false + coordinates[i, 3] = data.data[i, element] + end + end + trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), + makie_triangles) + end + plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh + return plotting_mesh +end + +# Returns a list of `GeometryBasics.Point`s which can be used to plot the mesh, or a solution "wireframe" +# (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution). +function convert_PlotData2D_to_mesh_Points(pd::PlotData2DTriangulated{<:ScalarData}; + set_z_coordinate_zero = false) + @unpack x_face, y_face, face_data = pd + + if set_z_coordinate_zero + # plot 2d surface by setting z coordinate to zero. + # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots. + sol_f = zeros(eltype(first(x_face)), size(x_face)) + else + sol_f = face_data + end + + # This line separates solution lines on each edge by NaNs to ensure that they are rendered + # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix + # whose columns correspond to different elements. We add NaN separators by appending a row of + # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up + # plotting. + xyz_wireframe = GeometryBasics.Point.(map(x -> vec(vcat(x, + fill(NaN, 1, size(x, 2)))), + (x_face, y_face, sol_f))...) + + return xyz_wireframe +end + +# We set the Makie default colormap to match Plots.jl, which uses `:inferno` by default. +default_Makie_colormap() = :inferno + +# convenience struct for editing Makie plots after they're created. +struct FigureAndAxes{Axes} + fig::Makie.Figure + axes::Axes +end + +# for "quiet" return arguments to Makie.plot(::TrixiODESolution) and +# Makie.plot(::PlotData2DTriangulated) +Base.show(io::IO, fa::FigureAndAxes) = nothing + +# allows for returning fig, axes = Makie.plot(...) +function Base.iterate(fa::FigureAndAxes, state = 1) + if state == 1 + return (fa.fig, 2) + elseif state == 2 + return (fa.axes, 3) + else + return nothing + end +end + +""" + iplot(u, mesh::UnstructuredMesh2D, equations, solver, cache; + plot_mesh=true, show_axis=false, colormap=default_Makie_colormap(), + variable_to_plot_in=1) + +Creates an interactive surface plot of the solution and mesh for an `UnstructuredMesh2D` type. + +Keywords: +- variable_to_plot_in: variable to show by default + +!!! warning "Experimental implementation" + This is an experimental feature and may change in future releases. +""" +function iplot end + +# Enables `iplot(PlotData2D(sol))`. +function iplot(pd::PlotData2DTriangulated; + plot_mesh = true, show_axis = false, colormap = default_Makie_colormap(), + variable_to_plot_in = 1) + @unpack variable_names = pd + + # Initialize a Makie figure that we'll add the solution and toggle switches to. + fig = Makie.Figure() + + # Set up options for the drop-down menu + menu_options = [zip(variable_names, 1:length(variable_names))...] + menu = Makie.Menu(fig, options = menu_options) + + # Initialize toggle switches for viewing the mesh + toggle_solution_mesh = Makie.Toggle(fig, active = plot_mesh) + toggle_mesh = Makie.Toggle(fig, active = plot_mesh) + + # Add dropdown menu and toggle switches to the left side of the figure. + fig[1, 1] = Makie.vgrid!(Makie.Label(fig, "Solution field", width = nothing), menu, + Makie.Label(fig, "Solution mesh visible"), + toggle_solution_mesh, + Makie.Label(fig, "Mesh visible"), toggle_mesh; + tellheight = false, width = 200) + + # Create a zoomable interactive axis object on top of which to plot the solution. + ax = Makie.LScene(fig[1, 2], scenekw = (show_axis = show_axis,)) + + # Initialize the dropdown menu to `variable_to_plot_in` + # Since menu.selection is an Observable type, we need to dereference it using `[]` to set. + menu.selection[] = variable_to_plot_in + menu.i_selected[] = variable_to_plot_in + + # Since `variable_to_plot` is an Observable, these lines are re-run whenever `variable_to_plot[]` + # is updated from the drop-down menu. + plotting_mesh = Makie.@lift(global_plotting_triangulation_makie(getindex(pd, + variable_names[$(menu.selection)]))) + solution_z = Makie.@lift(getindex.($plotting_mesh.position, 3)) + + # Plot the actual solution. + Makie.mesh!(ax, plotting_mesh; color = solution_z, colormap) + + # Create a mesh overlay by plotting a mesh both on top of and below the solution contours. + wire_points = Makie.@lift(convert_PlotData2D_to_mesh_Points(getindex(pd, + variable_names[$(menu.selection)]))) + wire_mesh_top = Makie.lines!(ax, wire_points, color = :white) + wire_mesh_bottom = Makie.lines!(ax, wire_points, color = :white) + Makie.translate!(wire_mesh_top, 0, 0, 1e-3) + Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3) + + # This draws flat mesh lines below the solution. + function compute_z_offset(solution_z) + zmin = minimum(solution_z) + zrange = (x -> x[2] - x[1])(extrema(solution_z)) + return zmin - 0.25 * zrange + end + z_offset = Makie.@lift(compute_z_offset($solution_z)) + function get_flat_points(wire_points, z_offset) + [Makie.Point(point.data[1:2]..., z_offset) for point in wire_points] + end + flat_wire_points = Makie.@lift get_flat_points($wire_points, $z_offset) + wire_mesh_flat = Makie.lines!(ax, flat_wire_points, color = :black) + + # create a small variation in the extrema to avoid the Makie `range_step` cannot be zero error. + # see https://github.com/MakieOrg/Makie.jl/issues/931 for more details. + # the colorbar range is perturbed by 1e-5 * the magnitude of the solution. + function scaled_extrema(x) + ex = extrema(x) + if ex[2] ≈ ex[1] # if solution is close to constant, perturb colorbar + return ex .+ 1e-5 .* maximum(abs.(ex)) .* (-1, 1) + else + return ex + end + end + + # Resets the colorbar each time the solution changes. + Makie.Colorbar(fig[1, 3], limits = Makie.@lift(scaled_extrema($solution_z)), + colormap = colormap) + + # This syncs the toggle buttons to the mesh plots. + Makie.connect!(wire_mesh_top.visible, toggle_solution_mesh.active) + Makie.connect!(wire_mesh_bottom.visible, toggle_solution_mesh.active) + Makie.connect!(wire_mesh_flat.visible, toggle_mesh.active) + + # On OSX, shift-command-4 for screenshots triggers a constant "up-zoom". + # To avoid this, we remap up-zoom to the right shift button instead. + Makie.cameracontrols(ax.scene).attributes[:up_key][] = Makie.Keyboard.right_shift + + # typing this pulls up the figure (similar to display(plot!()) in Plots.jl) + fig +end + +function iplot(u, mesh, equations, solver, cache; + solution_variables = nothing, nvisnodes = 2 * nnodes(solver), kwargs...) + @assert ndims(mesh) == 2 + + pd = PlotData2DTriangulated(u, mesh, equations, solver, cache; + solution_variables = solution_variables, + nvisnodes = nvisnodes) + + iplot(pd; kwargs...) +end + +# redirect `iplot(sol)` to dispatchable `iplot` signature. +iplot(sol::TrixiODESolution; kwargs...) = iplot(sol.u[end], sol.prob.p; kwargs...) +function iplot(u, semi; kwargs...) + iplot(wrap_array_native(u, semi), mesh_equations_solver_cache(semi)...; kwargs...) +end + +# Interactive visualization of user-defined ScalarData. +function iplot(pd::PlotData2DTriangulated{<:ScalarData}; + show_axis = false, colormap = default_Makie_colormap(), + plot_mesh = false) + fig = Makie.Figure() + + # Create a zoomable interactive axis object on top of which to plot the solution. + ax = Makie.LScene(fig[1, 1], scenekw = (show_axis = show_axis,)) + + # plot the user-defined ScalarData + fig_axis_plt = iplot!(FigureAndAxes(fig, ax), pd; colormap = colormap, + plot_mesh = plot_mesh) + + fig + return fig_axis_plt +end + +function iplot!(fig_axis::Union{FigureAndAxes, Makie.FigureAxisPlot}, + pd::PlotData2DTriangulated{<:ScalarData}; + colormap = default_Makie_colormap(), plot_mesh = false) + + # destructure first two fields of either FigureAndAxes or Makie.FigureAxisPlot + fig, ax = fig_axis + + # create triangulation of the scalar data to plot + plotting_mesh = global_plotting_triangulation_makie(pd) + solution_z = getindex.(plotting_mesh.position, 3) + plt = Makie.mesh!(ax, plotting_mesh; color = solution_z, colormap) + + if plot_mesh + wire_points = convert_PlotData2D_to_mesh_Points(pd) + wire_mesh_top = Makie.lines!(ax, wire_points, color = :white) + wire_mesh_bottom = Makie.lines!(ax, wire_points, color = :white) + Makie.translate!(wire_mesh_top, 0, 0, 1e-3) + Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3) + end + + # Add a colorbar to the rightmost part of the layout + Makie.Colorbar(fig[1, end + 1], plt) + + fig + return Makie.FigureAxisPlot(fig, ax, plt) +end + +# ================== new Makie plot recipes ==================== + +# This initializes a Makie recipe, which creates a new type definition which Makie uses to create +# custom `trixiheatmap` plots. See also https://makie.juliaplots.org/stable/recipes.html +Makie.@recipe(TrixiHeatmap, plot_data_series) do scene + Makie.Theme(colormap = default_Makie_colormap()) +end + +function Makie.plot!(myplot::TrixiHeatmap) + pds = myplot[:plot_data_series][] + + plotting_mesh = global_plotting_triangulation_makie(pds; + set_z_coordinate_zero = true) + + @unpack variable_id = pds + pd = pds.plot_data + solution_z = vec(StructArrays.component(pd.data, variable_id)) + Makie.mesh!(myplot, plotting_mesh, color = solution_z, shading = false, + colormap = myplot[:colormap]) + myplot.colorrange = extrema(solution_z) + + # Makie hides keyword arguments within `myplot`; see also + # https://github.com/JuliaPlots/Makie.jl/issues/837#issuecomment-845985070 + plot_mesh = if haskey(myplot, :plot_mesh) + myplot.plot_mesh[] + else + true # default to plotting the mesh + end + + if plot_mesh + xyz_wireframe = convert_PlotData2D_to_mesh_Points(pds; + set_z_coordinate_zero = true) + Makie.lines!(myplot, xyz_wireframe, color = :lightgrey) + end + + myplot +end + +# redirects Makie.plot(pd::PlotDataSeries) to custom recipe TrixiHeatmap(pd) +Makie.plottype(::Trixi.PlotDataSeries{<:Trixi.PlotData2DTriangulated}) = TrixiHeatmap + +# Makie does not yet support layouts in its plot recipes, so we overload `Makie.plot` directly. +function Makie.plot(sol::TrixiODESolution; + plot_mesh = false, solution_variables = nothing, + colormap = default_Makie_colormap()) + return Makie.plot(PlotData2DTriangulated(sol; solution_variables); plot_mesh, + colormap) +end + +function Makie.plot(pd::PlotData2DTriangulated, fig = Makie.Figure(); + plot_mesh = false, colormap = default_Makie_colormap()) + figAxes = Makie.plot!(fig, pd; plot_mesh, colormap) + display(figAxes.fig) + return figAxes +end + +function Makie.plot!(fig, pd::PlotData2DTriangulated; + plot_mesh = false, colormap = default_Makie_colormap()) + # Create layout that is as square as possible, when there are more than 3 subplots. + # This is done with a preference for more columns than rows if not. + if length(pd) <= 3 + cols = length(pd) + rows = 1 + else + cols = ceil(Int, sqrt(length(pd))) + rows = cld(length(pd), cols) + end + + axes = [Makie.Axis(fig[i, j], xlabel = "x", ylabel = "y") + for j in 1:rows, i in 1:cols] + row_list, col_list = ([i for j in 1:rows, i in 1:cols], + [j for j in 1:rows, i in 1:cols]) + + for (variable_to_plot, (variable_name, pds)) in enumerate(pd) + ax = axes[variable_to_plot] + plt = trixiheatmap!(ax, pds; plot_mesh, colormap) + + row = row_list[variable_to_plot] + col = col_list[variable_to_plot] + Makie.Colorbar(fig[row, col][1, 2], plt) + + ax.aspect = Makie.DataAspect() # equal aspect ratio + ax.title = variable_name + Makie.xlims!(ax, extrema(pd.x)) + Makie.ylims!(ax, extrema(pd.y)) + end + + return FigureAndAxes(fig, axes) +end +end # @muladd + +end diff --git a/src/Trixi.jl b/src/Trixi.jl index a8a2c0ad128..d5579aeea33 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -18,9 +18,11 @@ module Trixi # Include other packages that are used in Trixi.jl # (standard library packages first, other packages next, all of them sorted alphabetically) -using LinearAlgebra: LinearAlgebra, Diagonal, diag, dot, mul!, norm, cross, normalize, I, UniformScaling, det +using LinearAlgebra: LinearAlgebra, Diagonal, diag, dot, mul!, norm, cross, normalize, I, + UniformScaling, det using Printf: @printf, @sprintf, println -using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, droptol!, rowvals, nzrange, nonzeros, spzeros +using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, droptol!, + rowvals, nzrange, nonzeros, spzeros # import @reexport now to make it available for further imports/exports using Reexport: @reexport @@ -70,14 +72,15 @@ using SimpleUnPack: @pack! # finite difference SBP operators using SummationByPartsOperators: AbstractDerivativeOperator, - AbstractNonperiodicDerivativeOperator, DerivativeOperator, - AbstractPeriodicDerivativeOperator, PeriodicDerivativeOperator, grid + AbstractNonperiodicDerivativeOperator, DerivativeOperator, + AbstractPeriodicDerivativeOperator, + PeriodicDerivativeOperator, grid import SummationByPartsOperators: integrate, semidiscretize, compute_coefficients, compute_coefficients!, left_boundary_weight, right_boundary_weight -@reexport using SummationByPartsOperators: - SummationByPartsOperators, derivative_operator, periodic_derivative_operator, - upwind_operators +@reexport using SummationByPartsOperators: SummationByPartsOperators, derivative_operator, + periodic_derivative_operator, + upwind_operators # DGMulti solvers @reexport using StartUpDG: StartUpDG, Polynomial, Gauss, SBP, Line, Tri, Quad, Hex, Tet @@ -95,7 +98,6 @@ using StartUpDG: RefElemData, MeshData, AbstractElemShape # include(expr -> quote @muladd begin $expr end end, filename) # end - # Define the entry points of our type hierarchy, e.g. # AbstractEquations, AbstractSemidiscretization etc. # Placing them here allows us to make use of them for dispatch even for @@ -129,12 +131,16 @@ include("visualization/visualization.jl") # export types/functions that define the public API of Trixi.jl export AcousticPerturbationEquations2D, - CompressibleEulerEquations1D, CompressibleEulerEquations2D, CompressibleEulerEquations3D, - CompressibleEulerMulticomponentEquations1D, CompressibleEulerMulticomponentEquations2D, + CompressibleEulerEquations1D, CompressibleEulerEquations2D, + CompressibleEulerEquations3D, + CompressibleEulerMulticomponentEquations1D, + CompressibleEulerMulticomponentEquations2D, IdealGlmMhdEquations1D, IdealGlmMhdEquations2D, IdealGlmMhdEquations3D, IdealGlmMhdMulticomponentEquations1D, IdealGlmMhdMulticomponentEquations2D, - HyperbolicDiffusionEquations1D, HyperbolicDiffusionEquations2D, HyperbolicDiffusionEquations3D, - LinearScalarAdvectionEquation1D, LinearScalarAdvectionEquation2D, LinearScalarAdvectionEquation3D, + HyperbolicDiffusionEquations1D, HyperbolicDiffusionEquations2D, + HyperbolicDiffusionEquations3D, + LinearScalarAdvectionEquation1D, LinearScalarAdvectionEquation2D, + LinearScalarAdvectionEquation3D, InviscidBurgersEquation1D, LatticeBoltzmannEquations2D, LatticeBoltzmannEquations3D, ShallowWaterEquations1D, ShallowWaterEquations2D, @@ -146,7 +152,8 @@ export LaplaceDiffusion1D, LaplaceDiffusion2D, export GradientVariablesPrimitive, GradientVariablesEntropy -export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle, flux_godunov, +export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle, + flux_godunov, flux_chandrashekar, flux_ranocha, flux_derigs_etal, flux_hindenlang_gassner, flux_nonconservative_powell, flux_kennedy_gruber, flux_shima_etal, flux_ec, @@ -181,13 +188,17 @@ export boundary_condition_do_nothing, export initial_condition_convergence_test, source_terms_convergence_test export source_terms_harmonic -export initial_condition_poisson_nonperiodic, source_terms_poisson_nonperiodic, boundary_condition_poisson_nonperiodic -export initial_condition_eoc_test_coupled_euler_gravity, source_terms_eoc_test_coupled_euler_gravity, source_terms_eoc_test_euler +export initial_condition_poisson_nonperiodic, source_terms_poisson_nonperiodic, + boundary_condition_poisson_nonperiodic +export initial_condition_eoc_test_coupled_euler_gravity, + source_terms_eoc_test_coupled_euler_gravity, source_terms_eoc_test_euler export cons2cons, cons2prim, prim2cons, cons2macroscopic, cons2state, cons2mean, cons2entropy, entropy2cons -export density, pressure, density_pressure, velocity, global_mean_vars, equilibrium_distribution, waterheight_pressure -export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic, cross_helicity, +export density, pressure, density_pressure, velocity, global_mean_vars, + equilibrium_distribution, waterheight_pressure +export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic, + cross_helicity, enstrophy export lake_at_rest_error export ncomponents, eachcomponent @@ -229,7 +240,8 @@ export load_mesh, load_time export ControllerThreeLevel, ControllerThreeLevelCombined, IndicatorLöhner, IndicatorLoehner, IndicatorMax, - IndicatorNeuralNetwork, NeuralNetworkPerssonPeraire, NeuralNetworkRayHesthaven, NeuralNetworkCNN + IndicatorNeuralNetwork, NeuralNetworkPerssonPeraire, NeuralNetworkRayHesthaven, + NeuralNetworkCNN export PositivityPreservingLimiterZhangShu @@ -245,54 +257,57 @@ export DGMulti, DGMultiBasis, estimate_dt, DGMultiMesh, GaussSBP export ViscousFormulationBassiRebay1, ViscousFormulationLocalDG # Visualization-related exports -export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!, adapt_to_mesh_level +export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!, + adapt_to_mesh_level, + iplot, iplot! function __init__() - init_mpi() - - init_p4est() - - # Enable features that depend on the availability of the Plots package - @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin - using .Plots: Plots - end - - @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin - include("visualization/recipes_makie.jl") - using .Makie: Makie, GeometryBasics - export iplot, iplot! # interactive plot - end - - @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" begin - using .Flux: params - end - - # FIXME upstream. This is a hacky workaround for - # https://github.com/trixi-framework/Trixi.jl/issues/628 - # https://github.com/trixi-framework/Trixi.jl/issues/1185 - # The related upstream issues appear to be - # https://github.com/JuliaLang/julia/issues/35800 - # https://github.com/JuliaLang/julia/issues/32552 - # https://github.com/JuliaLang/julia/issues/41740 - # See also https://discourse.julialang.org/t/performance-depends-dramatically-on-compilation-order/58425 - if VERSION < v"1.9.0" - let - for T in (Float32, Float64) - u_mortars_2d = zeros(T, 2, 2, 2, 2, 2) - u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1) - LoopVectorization.axes(u_view_2d) - - u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2) - u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1) - LoopVectorization.axes(u_view_3d) - end + init_mpi() + + init_p4est() + + register_error_hints() + + # Enable features that depend on the availability of the Plots package + @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin + using .Plots: Plots + end + + # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl + @static if !isdefined(Base, :get_extension) + @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin + include("../ext/TrixiMakieExt.jl") + end + end + + @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" begin + using .Flux: params end - end -end + # FIXME upstream. This is a hacky workaround for + # https://github.com/trixi-framework/Trixi.jl/issues/628 + # https://github.com/trixi-framework/Trixi.jl/issues/1185 + # The related upstream issues appear to be + # https://github.com/JuliaLang/julia/issues/35800 + # https://github.com/JuliaLang/julia/issues/32552 + # https://github.com/JuliaLang/julia/issues/41740 + # See also https://discourse.julialang.org/t/performance-depends-dramatically-on-compilation-order/58425 + if VERSION < v"1.9.0" + let + for T in (Float32, Float64) + u_mortars_2d = zeros(T, 2, 2, 2, 2, 2) + u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1) + LoopVectorization.axes(u_view_2d) + + u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2) + u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1) + LoopVectorization.axes(u_view_3d) + end + end + end +end include("auxiliary/precompile.jl") _precompile_manual_() - end diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 4958e0d1fdc..115d055c0ca 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -17,13 +17,12 @@ const main_timer = TimerOutput() # Always call timer() to hide implementation details timer() = main_timer - # By default, Julia/LLVM does not use fused multiply-add operations (FMAs). # Since these FMAs can increase the performance of many numerical algorithms, # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ PerformanceCounter() @@ -34,27 +33,26 @@ runtime of all measurements added so far via `take!(counter)`, resetting the `counter`. """ mutable struct PerformanceCounter - ncalls_since_readout::Int - runtime::Float64 + ncalls_since_readout::Int + runtime::Float64 end PerformanceCounter() = PerformanceCounter(0, 0.0) @inline function Base.take!(counter::PerformanceCounter) - time_per_call = counter.runtime / counter.ncalls_since_readout - counter.ncalls_since_readout = 0 - counter.runtime = 0.0 - return time_per_call + time_per_call = counter.runtime / counter.ncalls_since_readout + counter.ncalls_since_readout = 0 + counter.runtime = 0.0 + return time_per_call end @inline function Base.put!(counter::PerformanceCounter, runtime::Real) - counter.ncalls_since_readout += 1 - counter.runtime += runtime + counter.ncalls_since_readout += 1 + counter.runtime += runtime end @inline ncalls(counter::PerformanceCounter) = counter.ncalls_since_readout - """ PerformanceCounterList{N}() @@ -65,41 +63,38 @@ the averaged runtime of all measurements added so far via `take!(counter)`, resetting the `counter`. """ struct PerformanceCounterList{N} - counters::NTuple{N, PerformanceCounter} - check_ncalls_consistency::Bool + counters::NTuple{N, PerformanceCounter} + check_ncalls_consistency::Bool end function PerformanceCounterList{N}(check_ncalls_consistency) where {N} - counters = ntuple(_ -> PerformanceCounter(), Val{N}()) - return PerformanceCounterList{N}(counters, check_ncalls_consistency) + counters = ntuple(_ -> PerformanceCounter(), Val{N}()) + return PerformanceCounterList{N}(counters, check_ncalls_consistency) end PerformanceCounterList{N}() where {N} = PerformanceCounterList{N}(true) @inline function Base.take!(counter_list::PerformanceCounterList) - time_per_call = 0.0 - for c in counter_list.counters - time_per_call += take!(c) - end - return time_per_call + time_per_call = 0.0 + for c in counter_list.counters + time_per_call += take!(c) + end + return time_per_call end @inline function ncalls(counter_list::PerformanceCounterList) - ncalls_first = ncalls(first(counter_list.counters)) - - if counter_list.check_ncalls_consistency - for c in counter_list.counters - if ncalls_first != ncalls(c) - error("Some counters have a different number of calls. Using `ncalls` on the counter list is undefined behavior.") - end + ncalls_first = ncalls(first(counter_list.counters)) + + if counter_list.check_ncalls_consistency + for c in counter_list.counters + if ncalls_first != ncalls(c) + error("Some counters have a different number of calls. Using `ncalls` on the counter list is undefined behavior.") + end + end end - end - return ncalls_first + return ncalls_first end - - - """ examples_dir() @@ -114,7 +109,6 @@ readdir(examples_dir()) """ examples_dir() = pkgdir(Trixi, "examples") - """ get_examples() @@ -122,27 +116,27 @@ Return a list of all example elixirs that are provided by Trixi.jl. See also [`examples_dir`](@ref) and [`default_example`](@ref). """ function get_examples() - examples = String[] - for (root, dirs, files) in walkdir(examples_dir()) - for f in files - if startswith(f, "elixir_") && endswith(f, ".jl") - push!(examples, joinpath(root, f)) - end + examples = String[] + for (root, dirs, files) in walkdir(examples_dir()) + for f in files + if startswith(f, "elixir_") && endswith(f, ".jl") + push!(examples, joinpath(root, f)) + end + end end - end - return examples + return examples end - """ default_example() Return the path to an example elixir that can be used to quickly see Trixi.jl in action on a [`TreeMesh`]@(ref). See also [`examples_dir`](@ref) and [`get_examples`](@ref). """ -default_example() = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl") - +function default_example() + joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl") +end """ default_example_unstructured() @@ -151,8 +145,9 @@ Return the path to an example elixir that can be used to quickly see Trixi.jl in [`UnstructuredMesh2D`]@(ref). This simulation is run on the example curved, unstructured mesh given in the Trixi.jl documentation regarding unstructured meshes. """ -default_example_unstructured() = joinpath(examples_dir(), "unstructured_2d_dgsem", "elixir_euler_basic.jl") - +function default_example_unstructured() + joinpath(examples_dir(), "unstructured_2d_dgsem", "elixir_euler_basic.jl") +end """ ode_default_options() @@ -163,28 +158,28 @@ whenever MPI is used. For example, use `solve(ode, alg; ode_default_options()...)` """ function ode_default_options() - if mpi_isparallel() - return (; save_everystep = false, internalnorm = ode_norm, unstable_check = ode_unstable_check) - else - return (; save_everystep = false) - end + if mpi_isparallel() + return (; save_everystep = false, internalnorm = ode_norm, + unstable_check = ode_unstable_check) + else + return (; save_everystep = false) + end end # Print informative message at startup function print_startup_message() - s = """ - - ████████╗██████╗ ██╗██╗ ██╗██╗ - ╚══██╔══╝██╔══██╗██║╚██╗██╔╝██║ - ██║ ██████╔╝██║ ╚███╔╝ ██║ - ██║ ██╔══██╗██║ ██╔██╗ ██║ - ██║ ██║ ██║██║██╔╝ ██╗██║ - ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═╝╚═╝ - """ - mpi_println(s) + s = """ + + ████████╗██████╗ ██╗██╗ ██╗██╗ + ╚══██╔══╝██╔══██╗██║╚██╗██╔╝██║ + ██║ ██████╔╝██║ ╚███╔╝ ██║ + ██║ ██╔══██╗██║ ██╔██╗ ██║ + ██║ ██║ ██║██║██╔╝ ██╗██║ + ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═╝╚═╝ + """ + mpi_println(s) end - """ get_name(x) @@ -202,9 +197,7 @@ julia> Trixi.get_name(Val(:test)) ``` """ get_name(x) = string(x) -get_name(::Val{x}) where x = string(x) - - +get_name(::Val{x}) where {x} = string(x) """ @threaded for ... end @@ -224,35 +217,36 @@ Some discussion can be found at https://discourse.julialang.org/t/overhead-of-th and https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435. """ macro threaded(expr) - # Use `esc(quote ... end)` for nested macro calls as suggested in - # https://github.com/JuliaLang/julia/issues/23221 - # - # The following code is a simple version using only `Threads.@threads` from the - # standard library with an additional check whether only a single thread is used - # to reduce some overhead (and allocations) for serial execution. - # - # return esc(quote - # let - # if Threads.nthreads() == 1 - # $(expr) - # else - # Threads.@threads $(expr) - # end - # end - # end) - # - # However, the code below using `@batch` from Polyester.jl is more efficient, - # since this packages provides threads with less overhead. Since it is written - # by Chris Elrod, the author of LoopVectorization.jl, we expect this package - # to provide the most efficient and useful implementation of threads (as we use - # them) available in Julia. - # !!! danger "Heisenbug" - # Look at the comments for `wrap_array` when considering to change this macro. - - return esc(quote Trixi.@batch $(expr) end) + # Use `esc(quote ... end)` for nested macro calls as suggested in + # https://github.com/JuliaLang/julia/issues/23221 + # + # The following code is a simple version using only `Threads.@threads` from the + # standard library with an additional check whether only a single thread is used + # to reduce some overhead (and allocations) for serial execution. + # + # return esc(quote + # let + # if Threads.nthreads() == 1 + # $(expr) + # else + # Threads.@threads $(expr) + # end + # end + # end) + # + # However, the code below using `@batch` from Polyester.jl is more efficient, + # since this packages provides threads with less overhead. Since it is written + # by Chris Elrod, the author of LoopVectorization.jl, we expect this package + # to provide the most efficient and useful implementation of threads (as we use + # them) available in Julia. + # !!! danger "Heisenbug" + # Look at the comments for `wrap_array` when considering to change this macro. + + return esc(quote + Trixi.@batch $(expr) + end) end - # @trixi_timeit timer() "some label" expression # # Basically the same as a special case of `@timeit_debug` from @@ -261,26 +255,25 @@ end # but it also avoids some related performance problems. Since we do not use # exception handling in Trixi.jl, that's not really an issue. macro trixi_timeit(timer_output, label, expr) - timeit_block = quote - if timeit_debug_enabled() - local to = $(esc(timer_output)) - local enabled = to.enabled - if enabled - local accumulated_data = $(TimerOutputs.push!)(to, $(esc(label))) - end - local b₀ = $(TimerOutputs.gc_bytes)() - local t₀ = $(TimerOutputs.time_ns)() + timeit_block = quote + if timeit_debug_enabled() + local to = $(esc(timer_output)) + local enabled = to.enabled + if enabled + local accumulated_data = $(TimerOutputs.push!)(to, $(esc(label))) + end + local b₀ = $(TimerOutputs.gc_bytes)() + local t₀ = $(TimerOutputs.time_ns)() + end + local val = $(esc(expr)) + if timeit_debug_enabled() && enabled + $(TimerOutputs.do_accumulate!)(accumulated_data, t₀, b₀) + $(TimerOutputs.pop!)(to) + end + val end - local val = $(esc(expr)) - if timeit_debug_enabled() && enabled - $(TimerOutputs.do_accumulate!)(accumulated_data, t₀, b₀) - $(TimerOutputs.pop!)(to) - end - val - end end - """ @autoinfiltrate @autoinfiltrate condition::Bool @@ -304,32 +297,52 @@ See also: [Infiltrator.jl](https://github.com/JuliaDebug/Infiltrator.jl) a breaking change. """ macro autoinfiltrate(condition = true) - pkgid = Base.PkgId(Base.UUID("5903a43b-9cc3-4c30-8d17-598619ec4e9b"), "Infiltrator") - if !haskey(Base.loaded_modules, pkgid) - try - Base.eval(Main, :(using Infiltrator)) - catch err - @error "Cannot load Infiltrator.jl. Make sure it is included in your environment stack." + pkgid = Base.PkgId(Base.UUID("5903a43b-9cc3-4c30-8d17-598619ec4e9b"), "Infiltrator") + if !haskey(Base.loaded_modules, pkgid) + try + Base.eval(Main, :(using Infiltrator)) + catch err + @error "Cannot load Infiltrator.jl. Make sure it is included in your environment stack." + end end - end - i = get(Base.loaded_modules, pkgid, nothing) - lnn = LineNumberNode(__source__.line, __source__.file) - - if i === nothing - return Expr( - :macrocall, - Symbol("@warn"), - lnn, - "Could not load Infiltrator.") - end - - return Expr( - :macrocall, - Expr(:., i, QuoteNode(Symbol("@infiltrate"))), - lnn, - esc(condition) - ) + i = get(Base.loaded_modules, pkgid, nothing) + lnn = LineNumberNode(__source__.line, __source__.file) + + if i === nothing + return Expr(:macrocall, + Symbol("@warn"), + lnn, + "Could not load Infiltrator.") + end + + return Expr(:macrocall, + Expr(:., i, QuoteNode(Symbol("@infiltrate"))), + lnn, + esc(condition)) end +# Use the *experimental* feature in `Base` to add error hints for specific errors. We use it to +# warn users in case they try to execute functions that are extended in package extensions which +# have not yet been loaded. +# +# Reference: https://docs.julialang.org/en/v1/base/base/#Base.Experimental.register_error_hint +function register_error_hints() + # We follow the advice in the docs and gracefully exit without doing anything if the experimental + # features gets silently removed. + if !isdefined(Base.Experimental, :register_error_hint) + return nothing + end + Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs + if exc.f in [iplot, iplot!] && isempty(methods(exc.f)) + print(io, + "\n$(exc.f) has no methods yet. It is part of a plotting extension of Trixi.jl " * + "that relies on Makie being loaded.\n" * + "To activate the extension, execute `using Makie`, `using CairoMakie`, " * + "`using GLMakie`, or load any other package that also uses Makie.") + end + end + + return nothing +end end # @muladd diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 711471bdafc..90650f6abcf 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Abstract base type - all containers that want to use these features must inherit from it abstract type AbstractContainer end @@ -15,33 +15,30 @@ function move_connectivity! end function delete_connectivity! end function reset_data_structures! end - # Auxiliary copy function to copy data between containers function copy_data!(target::AbstractArray, source::AbstractArray, - first::Int, last::Int, destination::Int, block_size::Int=1) - count = last - first + 1 - if destination <= first || destination > last - # In this case it is safe to copy forward (left-to-right) without overwriting data - for i in 0:(count-1), j in 1:block_size - target[block_size*(destination+i-1) + j] = source[block_size*(first+i-1) + j] + first::Int, last::Int, destination::Int, block_size::Int = 1) + count = last - first + 1 + if destination <= first || destination > last + # In this case it is safe to copy forward (left-to-right) without overwriting data + for i in 0:(count - 1), j in 1:block_size + target[block_size * (destination + i - 1) + j] = source[block_size * (first + i - 1) + j] + end + else + # In this case we need to copy backward (right-to-left) to prevent overwriting data + for i in (count - 1):-1:0, j in 1:block_size + target[block_size * (destination + i - 1) + j] = source[block_size * (first + i - 1) + j] + end end - else - # In this case we need to copy backward (right-to-left) to prevent overwriting data - for i in (count-1):-1:0, j in 1:block_size - target[block_size*(destination+i-1) + j] = source[block_size*(first+i-1) + j] - end - end - return target + return target end - # Inquire about capacity and size capacity(c::AbstractContainer) = c.capacity Base.length(c::AbstractContainer) = c.length Base.size(c::AbstractContainer) = (length(c),) - """ resize!(c::AbstractContainer, new_length) -> AbstractContainer @@ -50,26 +47,25 @@ length, the first `new_length` elements will be retained. If `new_length` is larger, the new elements are invalidated. """ function Base.resize!(c::AbstractContainer, new_length) - @assert new_length >= zero(new_length) "New length must be >= 0" - @assert new_length <= capacity(c) "New length would exceed capacity" - - # If new length is greater than current length, append to container. - # If new length is less than current length, shrink container. - # If new length is equal to current length, do nothing. - if new_length > length(c) - # First, invalidate range (to be sure that no sensible values are accidentally left there) - invalidate!(c, length(c) + 1, new_length) - - # Then, set new container length - c.length = new_length - elseif new_length < length(c) - # Rely on remove&shift to do The Right Thing (`remove_shift!` also updates the length) - remove_shift!(c, new_length + 1, length(c)) - end - - return c -end + @assert new_length>=zero(new_length) "New length must be >= 0" + @assert new_length<=capacity(c) "New length would exceed capacity" + + # If new length is greater than current length, append to container. + # If new length is less than current length, shrink container. + # If new length is equal to current length, do nothing. + if new_length > length(c) + # First, invalidate range (to be sure that no sensible values are accidentally left there) + invalidate!(c, length(c) + 1, new_length) + + # Then, set new container length + c.length = new_length + elseif new_length < length(c) + # Rely on remove&shift to do The Right Thing (`remove_shift!` also updates the length) + remove_shift!(c, new_length + 1, length(c)) + end + return c +end # Copy data range from source to target container. # @@ -77,255 +73,245 @@ end # inheriting from AbstractContainer. # TODO: Shall we extend Base.copyto! ? function Trixi.copy!(target::AbstractContainer, source::AbstractContainer, - first::Int, last::Int, destination::Int) - @assert 1 <= first <= length(source) "First cell out of range" - @assert 1 <= last <= length(source) "Last cell out of range" - @assert 1 <= destination <= length(target) "Destination out of range" - @assert destination + (last - first) <= length(target) "Target range out of bounds" - - # Return if copy would be a no-op - if last < first || (source === target && first == destination) - return target - end + first::Int, last::Int, destination::Int) + @assert 1<=first<=length(source) "First cell out of range" + @assert 1<=last<=length(source) "Last cell out of range" + @assert 1<=destination<=length(target) "Destination out of range" + @assert destination + (last - first)<=length(target) "Target range out of bounds" + + # Return if copy would be a no-op + if last < first || (source === target && first == destination) + return target + end - raw_copy!(target, source, first, last, destination) + raw_copy!(target, source, first, last, destination) - return target + return target end - # Convenience method to copy a single element -function Trixi.copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) - Trixi.copy!(target, source, from, from, destination) +function Trixi.copy!(target::AbstractContainer, source::AbstractContainer, from::Int, + destination::Int) + Trixi.copy!(target, source, from, from, destination) end - # Convenience method for copies within a single container function Trixi.copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) - Trixi.copy!(c, c, first, last, destination) + Trixi.copy!(c, c, first, last, destination) end - # Convenience method for copying a single element within a single container function Trixi.copy!(c::AbstractContainer, from::Int, destination::Int) - Trixi.copy!(c, c, from, from, destination) + Trixi.copy!(c, c, from, from, destination) end - # Move elements in a way that preserves connectivity. function move!(c::AbstractContainer, first::Int, last::Int, destination::Int) - @assert 1 <= first <= length(c) "First cell $first out of range" - @assert 1 <= last <= length(c) "Last cell $last out of range" - @assert 1 <= destination <= length(c) "Destination $destination out of range" - @assert destination + (last - first) <= length(c) "Target range out of bounds" - - # Return if move would be a no-op - if last < first || first == destination - return c - end - - # Copy cells to new location - raw_copy!(c, first, last, destination) + @assert 1<=first<=length(c) "First cell $first out of range" + @assert 1<=last<=length(c) "Last cell $last out of range" + @assert 1<=destination<=length(c) "Destination $destination out of range" + @assert destination + (last - first)<=length(c) "Target range out of bounds" + + # Return if move would be a no-op + if last < first || first == destination + return c + end - # Move connectivity - move_connectivity!(c, first, last, destination) + # Copy cells to new location + raw_copy!(c, first, last, destination) + # Move connectivity + move_connectivity!(c, first, last, destination) - # Invalidate original cell locations (unless they already contain new data due to overlap) - # 1) If end of destination range is within original range, shift first_invalid to the right - count = last - first + 1 - first_invalid = (first <= destination + count - 1 <= last) ? destination + count : first - # 2) If beginning of destination range is within original range, shift last_invalid to the left - last_invalid = (first <= destination <= last) ? destination - 1 : last - # 3) Invalidate range - invalidate!(c, first_invalid, last_invalid) + # Invalidate original cell locations (unless they already contain new data due to overlap) + # 1) If end of destination range is within original range, shift first_invalid to the right + count = last - first + 1 + first_invalid = (first <= destination + count - 1 <= last) ? destination + count : + first + # 2) If beginning of destination range is within original range, shift last_invalid to the left + last_invalid = (first <= destination <= last) ? destination - 1 : last + # 3) Invalidate range + invalidate!(c, first_invalid, last_invalid) - return c + return c +end +function move!(c::AbstractContainer, from::Int, destination::Int) + move!(c, from, from, destination) end -move!(c::AbstractContainer, from::Int, destination::Int) = move!(c, from, from, destination) # Default implementation for moving a single element function move_connectivity!(c::AbstractContainer, from::Int, destination::Int) - return move_connectivity!(c, from, from, destination) + return move_connectivity!(c, from, from, destination) end # Default implementation for invalidating a single element function invalidate!(c::AbstractContainer, id::Int) - return invalidate!(c, id, id) + return invalidate!(c, id, id) end - # Swap two elements in a container while preserving element connectivity. function swap!(c::AbstractContainer, a::Int, b::Int) - @assert 1 <= a <= length(c) "a out of range" - @assert 1 <= b <= length(c) "b out of range" + @assert 1<=a<=length(c) "a out of range" + @assert 1<=b<=length(c) "b out of range" - # Return if swap would be a no-op - if a == b - return c - end + # Return if swap would be a no-op + if a == b + return c + end - # Move a to dummy location - raw_copy!(c, a, c.dummy) - move_connectivity!(c, a, c.dummy) + # Move a to dummy location + raw_copy!(c, a, c.dummy) + move_connectivity!(c, a, c.dummy) - # Move b to a - raw_copy!(c, b, a) - move_connectivity!(c, b, a) + # Move b to a + raw_copy!(c, b, a) + move_connectivity!(c, b, a) - # Move from dummy location to b - raw_copy!(c, c.dummy, b) - move_connectivity!(c, c.dummy, b) + # Move from dummy location to b + raw_copy!(c, c.dummy, b) + move_connectivity!(c, c.dummy, b) - # Invalidate dummy to be sure - invalidate!(c, c.dummy) + # Invalidate dummy to be sure + invalidate!(c, c.dummy) - return c + return c end - # Insert blank elements in container, shifting the following elements back. # # After a call to insert!, the range `position:position + count - 1` will be available for use. # TODO: Shall we extend Base.insert! ? function insert!(c::AbstractContainer, position::Int, count::Int) - @assert 1 <= position <= length(c) + 1 "Insert position out of range" - @assert count >= 0 "Count must be non-negative" - @assert count + length(c) <= capacity(c) "New length would exceed capacity" + @assert 1<=position<=length(c)+1 "Insert position out of range" + @assert count>=0 "Count must be non-negative" + @assert count + length(c)<=capacity(c) "New length would exceed capacity" - # Return if insertation would be a no-op - if count == 0 - return c - end + # Return if insertation would be a no-op + if count == 0 + return c + end - # Append and return if insertion is beyond last current element - if position == length(c) + 1 - resize!(c, length(c) + count) - return c - end + # Append and return if insertion is beyond last current element + if position == length(c) + 1 + resize!(c, length(c) + count) + return c + end - # Increase length - c.length += count + # Increase length + c.length += count - # Move original cells that currently occupy the insertion region, unless - # insert position is one beyond previous length - if position <= length(c) - count - move!(c, position, length(c) - count, position + count) - end + # Move original cells that currently occupy the insertion region, unless + # insert position is one beyond previous length + if position <= length(c) - count + move!(c, position, length(c) - count, position + count) + end - return c + return c end - # Erase elements from container, deleting their connectivity and then invalidating their data. # TODO: Shall we extend Base.deleteat! or Base.delete! ? function erase!(c::AbstractContainer, first::Int, last::Int) - @assert 1 <= first <= length(c) "First cell out of range" - @assert 1 <= last <= length(c) "Last cell out of range" + @assert 1<=first<=length(c) "First cell out of range" + @assert 1<=last<=length(c) "Last cell out of range" - # Return if eraseure would be a no-op - if last < first - return c - end + # Return if eraseure would be a no-op + if last < first + return c + end - # Delete connectivity and invalidate cells - delete_connectivity!(c, first, last) - invalidate!(c, first, last) + # Delete connectivity and invalidate cells + delete_connectivity!(c, first, last) + invalidate!(c, first, last) - return c + return c end erase!(c::AbstractContainer, id::Int) = erase!(c, id, id) - # Remove cells and shift existing cells forward to close the gap function remove_shift!(c::AbstractContainer, first::Int, last::Int) - @assert 1 <= first <= length(c) "First cell out of range" - @assert 1 <= last <= length(c) "Last cell out of range" + @assert 1<=first<=length(c) "First cell out of range" + @assert 1<=last<=length(c) "Last cell out of range" - # Return if removal would be a no-op - if last < first - return c - end + # Return if removal would be a no-op + if last < first + return c + end - # Delete connectivity of cells to be removed - delete_connectivity!(c, first, last) + # Delete connectivity of cells to be removed + delete_connectivity!(c, first, last) - if last == length(c) - # If everything up to the last cell is removed, no shifting is required - invalidate!(c, first, last) - else - # Otherwise, the corresponding cells are moved forward - move!(c, last + 1, length(c), first) - end + if last == length(c) + # If everything up to the last cell is removed, no shifting is required + invalidate!(c, first, last) + else + # Otherwise, the corresponding cells are moved forward + move!(c, last + 1, length(c), first) + end - # Reduce length - count = last - first + 1 - c.length -= count + # Reduce length + count = last - first + 1 + c.length -= count - return c + return c end remove_shift!(c::AbstractContainer, id::Int) = remove_shift!(c, id, id) - # Remove cells and fill gap with cells from the end of the container (to reduce copy operations) function remove_fill!(c::AbstractContainer, first::Int, last::Int) - @assert 1 <= first <= length(c) "First cell out of range" - @assert 1 <= last <= length(c) "Last cell out of range" + @assert 1<=first<=length(c) "First cell out of range" + @assert 1<=last<=length(c) "Last cell out of range" - # Return if removal would be a no-op - if last < first - return c - end + # Return if removal would be a no-op + if last < first + return c + end - # Delete connectivity of cells to be removed and then invalidate them - delete_connectivity!(c, first, last) - invalidate!(c, first, last) + # Delete connectivity of cells to be removed and then invalidate them + delete_connectivity!(c, first, last) + invalidate!(c, first, last) - # Copy cells from end (unless last is already the last cell) - count = last - first + 1 - if last < length(c) - move!(c, max(length(c) - count + 1, last + 1), length(c), first) - end + # Copy cells from end (unless last is already the last cell) + count = last - first + 1 + if last < length(c) + move!(c, max(length(c) - count + 1, last + 1), length(c), first) + end - # Reduce length - c.length -= count + # Reduce length + c.length -= count - return c + return c end - # Reset container to zero-length and with a new capacity function reset!(c::AbstractContainer, capacity::Int) - @assert capacity >=0 + @assert capacity >= 0 - c.capacity = capacity - c.length = 0 - c.dummy = capacity + 1 - reset_data_structures!(c) + c.capacity = capacity + c.length = 0 + c.dummy = capacity + 1 + reset_data_structures!(c) - return c + return c end - # Invalidate all elements and set length to zero. function clear!(c::AbstractContainer) - invalidate!(c) - c.length = 0 + invalidate!(c) + c.length = 0 - return c + return c end - # Helpful overloads for `raw_copy` function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) - raw_copy!(c, c, first, last, destination) + raw_copy!(c, c, first, last, destination) end -function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) - raw_copy!(target, source, from, from, destination) +function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, + destination::Int) + raw_copy!(target, source, from, from, destination) end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) - raw_copy!(c, c, from, from, destination) + raw_copy!(c, c, from, from, destination) end - - end # @muladd diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 78340c86cc3..27c1bed5ca4 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ ln_mean(x, y) @@ -54,13 +54,13 @@ Given ε = 1.0e-4, we use the following algorithm. https://www.agner.org/optimize/instruction_tables.pdf """ @inline function ln_mean(x, y) - epsilon_f2 = 1.0e-4 - f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 - if f2 < epsilon_f2 - return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) - else - return (y - x) / log(y / x) - end + epsilon_f2 = 1.0e-4 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) + else + return (y - x) / log(y / x) + end end """ @@ -74,17 +74,15 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast) multiplication. """ @inline function inv_ln_mean(x, y) - epsilon_f2 = 1.0e-4 - f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 - if f2 < epsilon_f2 - return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) - else - return log(y / x) / (y - x) - end + epsilon_f2 = 1.0e-4 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) + else + return log(y / x) / (y - x) + end end - - # `Base.max` and `Base.min` perform additional checks for signed zeros and `NaN`s # which are not present in comparable functions in Fortran/C++. For example, # ```julia @@ -190,8 +188,6 @@ julia> min(2, 5, 1) """ @inline min(args...) = @fastmath min(args...) - - """ positive_part(x) @@ -199,7 +195,7 @@ Return `x` if `x` is positive, else zero. In other words, return `(x + abs(x)) / 2` for real numbers `x`. """ @inline function positive_part(x) - return max(x, zero(x)) + return max(x, zero(x)) end """ @@ -209,8 +205,6 @@ Return `x` if `x` is negative, else zero. In other words, return `(x - abs(x)) / 2` for real numbers `x`. """ @inline function negative_part(x) - return min(x, zero(x)) + return min(x, zero(x)) end - - end # @muladd diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl index ab1b13d49da..2c485b4832c 100644 --- a/src/auxiliary/mpi.jl +++ b/src/auxiliary/mpi.jl @@ -6,29 +6,28 @@ Initialize MPI by calling `MPI.Initialized()`. The function will check if MPI is and if yes, do nothing, thus it is safe to call it multiple times. """ function init_mpi() - if MPI_INITIALIZED[] + if MPI_INITIALIZED[] + return nothing + end + + # MPI.jl handles multiple calls to MPI.Init appropriately. Thus, we don't need + # any common checks of the form `if MPI.Initialized() ...`. + # threadlevel=MPI.THREAD_FUNNELED: Only main thread makes MPI calls + # finalize_atexit=true : MPI.jl will call call MPI.Finalize as `atexit` hook + provided = MPI.Init(threadlevel = MPI.THREAD_FUNNELED, finalize_atexit = true) + @assert provided>=MPI.THREAD_FUNNELED "MPI library with insufficient threading support" + + # Initialize global MPI state + MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) + MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) + MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 + MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] + MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 + MPI_INITIALIZED[] = true + return nothing - end - - # MPI.jl handles multiple calls to MPI.Init appropriately. Thus, we don't need - # any common checks of the form `if MPI.Initialized() ...`. - # threadlevel=MPI.THREAD_FUNNELED: Only main thread makes MPI calls - # finalize_atexit=true : MPI.jl will call call MPI.Finalize as `atexit` hook - provided = MPI.Init(threadlevel=MPI.THREAD_FUNNELED, finalize_atexit=true) - @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" - - # Initialize global MPI state - MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) - MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) - MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 - MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] - MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 - MPI_INITIALIZED[] = true - - return nothing end - const MPI_INITIALIZED = Ref(false) const MPI_RANK = Ref(-1) const MPI_SIZE = Ref(-1) @@ -36,7 +35,6 @@ const MPI_IS_PARALLEL = Ref(false) const MPI_IS_SERIAL = Ref(true) const MPI_IS_ROOT = Ref(true) - @inline mpi_comm() = MPI.COMM_WORLD @inline mpi_rank() = MPI_RANK[] @@ -50,19 +48,18 @@ const MPI_IS_ROOT = Ref(true) @inline mpi_root() = 0 @inline function mpi_println(args...) - if mpi_isroot() - println(args...) - end - return nothing + if mpi_isroot() + println(args...) + end + return nothing end @inline function mpi_print(args...) - if mpi_isroot() - print(args...) - end - return nothing + if mpi_isroot() + print(args...) + end + return nothing end - """ ode_norm(u, t) @@ -79,14 +76,15 @@ See the "Advanced Adaptive Stepsize Control" section of the [documentation](http """ ode_norm(u::Number, t) = @fastmath abs(u) function ode_norm(u::AbstractArray, t) - local_sumabs2 = recursive_sum_abs2(u) # sum(abs2, u) - local_length = recursive_length(u) # length(u) - if mpi_isparallel() - global_sumabs2, global_length = MPI.Allreduce([local_sumabs2, local_length], +, mpi_comm()) - return sqrt(global_sumabs2 / global_length) - else - return sqrt(local_sumabs2 / local_length) - end + local_sumabs2 = recursive_sum_abs2(u) # sum(abs2, u) + local_length = recursive_length(u) # length(u) + if mpi_isparallel() + global_sumabs2, global_length = MPI.Allreduce([local_sumabs2, local_length], +, + mpi_comm()) + return sqrt(global_sumabs2 / global_length) + else + return sqrt(local_sumabs2 / local_length) + end end # Recursive `sum(abs2, ...)` and `length(...)` are required when dealing with @@ -102,16 +100,18 @@ recursive_sum_abs2(u::Number) = abs2(u) # https://github.com/SciML/RecursiveArrayTools.jl # However, what you have is good enough for us for now, so we don't need this # additional dependency at the moment. -recursive_sum_abs2(u::AbstractArray) = mapreduce(recursive_sum_abs2, +, u; init=zero(eltype(eltype(u)))) +function recursive_sum_abs2(u::AbstractArray) + mapreduce(recursive_sum_abs2, +, u; init = zero(eltype(eltype(u)))) +end recursive_length(u::Number) = length(u) recursive_length(u::AbstractArray{<:Number}) = length(u) recursive_length(u::AbstractArray{<:AbstractArray}) = sum(recursive_length, u) -function recursive_length(u::AbstractArray{<:StaticArrays.StaticArray{S, <:Number}}) where {S} - prod(StaticArrays.Size(eltype(u))) * length(u) +function recursive_length(u::AbstractArray{<:StaticArrays.StaticArray{S, + <:Number}}) where {S} + prod(StaticArrays.Size(eltype(u))) * length(u) end - """ ode_unstable_check(dt, u, semi, t) diff --git a/src/auxiliary/p4est.jl b/src/auxiliary/p4est.jl index b7851ba6f24..93b5166cd81 100644 --- a/src/auxiliary/p4est.jl +++ b/src/auxiliary/p4est.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ init_p4est() @@ -13,117 +13,117 @@ This function will check if `p4est` is already initialized and if yes, do nothing, thus it is safe to call it multiple times. """ function init_p4est() - p4est_package_id = P4est.package_id() - if p4est_package_id >= 0 - return nothing - end + p4est_package_id = P4est.package_id() + if p4est_package_id >= 0 + return nothing + end - # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations - p4est_init(C_NULL, SC_LP_ERROR) + # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations + p4est_init(C_NULL, SC_LP_ERROR) - return nothing + return nothing end - # Convert sc_array of type T to Julia array -function unsafe_wrap_sc(::Type{T}, sc_array::Ptr{sc_array}) where T - sc_array_obj = unsafe_load(sc_array) - return unsafe_wrap_sc(T, sc_array_obj) +function unsafe_wrap_sc(::Type{T}, sc_array::Ptr{sc_array}) where {T} + sc_array_obj = unsafe_load(sc_array) + return unsafe_wrap_sc(T, sc_array_obj) end -function unsafe_wrap_sc(::Type{T}, sc_array_obj::sc_array) where T - elem_count = sc_array_obj.elem_count - array = sc_array_obj.array +function unsafe_wrap_sc(::Type{T}, sc_array_obj::sc_array) where {T} + elem_count = sc_array_obj.elem_count + array = sc_array_obj.array - return unsafe_wrap(Array, Ptr{T}(array), elem_count) + return unsafe_wrap(Array, Ptr{T}(array), elem_count) end - # Load the ith element (1-indexed) of an sc array of type T -function unsafe_load_sc(::Type{T}, sc_array::Ptr{sc_array}, i=1) where T - sc_array_obj = unsafe_load(sc_array) - return unsafe_load_sc(T, sc_array_obj, i) +function unsafe_load_sc(::Type{T}, sc_array::Ptr{sc_array}, i = 1) where {T} + sc_array_obj = unsafe_load(sc_array) + return unsafe_load_sc(T, sc_array_obj, i) end -function unsafe_load_sc(::Type{T}, sc_array_obj::sc_array, i=1) where T - element_size = sc_array_obj.elem_size - @assert element_size == sizeof(T) +function unsafe_load_sc(::Type{T}, sc_array_obj::sc_array, i = 1) where {T} + element_size = sc_array_obj.elem_size + @assert element_size == sizeof(T) - return unsafe_load(Ptr{T}(sc_array_obj.array), i) + return unsafe_load(Ptr{T}(sc_array_obj.array), i) end - # Create new `p4est` from a p4est_connectivity # 2D function new_p4est(connectivity::Ptr{p4est_connectivity_t}, initial_refinement_level) - comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI - p4est_new_ext(comm, - connectivity, - 0, # No minimum initial qudrants per processor - initial_refinement_level, - true, # Refine uniformly - 2 * sizeof(Int), # Use Int-Vector of size 2 as quadrant user data - C_NULL, # No init function - C_NULL) # No user pointer + comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI + p4est_new_ext(comm, + connectivity, + 0, # No minimum initial qudrants per processor + initial_refinement_level, + true, # Refine uniformly + 2 * sizeof(Int), # Use Int-Vector of size 2 as quadrant user data + C_NULL, # No init function + C_NULL) # No user pointer end # 3D function new_p4est(connectivity::Ptr{p8est_connectivity_t}, initial_refinement_level) - comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI - p8est_new_ext(comm, connectivity, 0, initial_refinement_level, true, 2 * sizeof(Int), C_NULL, C_NULL) + comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI + p8est_new_ext(comm, connectivity, 0, initial_refinement_level, true, + 2 * sizeof(Int), C_NULL, C_NULL) end - # Save `p4est` data to file # 2D function save_p4est!(file, p4est::Ptr{p4est_t}) - # Don't save user data of the quads - p4est_save(file, p4est, false) + # Don't save user data of the quads + p4est_save(file, p4est, false) end # 3D function save_p4est!(file, p8est::Ptr{p8est_t}) - # Don't save user data of the quads - p8est_save(file, p8est, false) + # Don't save user data of the quads + p8est_save(file, p8est, false) end - # Load `p4est` from file # 2D function load_p4est(file, ::Val{2}) - conn_vec = Vector{Ptr{p4est_connectivity_t}}(undef, 1) - comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI - p4est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec)) + conn_vec = Vector{Ptr{p4est_connectivity_t}}(undef, 1) + comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI + p4est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec)) end # 3D function load_p4est(file, ::Val{3}) - conn_vec = Vector{Ptr{p8est_connectivity_t}}(undef, 1) - comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI - p8est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec)) + conn_vec = Vector{Ptr{p8est_connectivity_t}}(undef, 1) + comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI + p8est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec)) end - # Read `p4est` connectivity from Abaqus mesh file (.inp) # 2D read_inp_p4est(meshfile, ::Val{2}) = p4est_connectivity_read_inp(meshfile) # 3D read_inp_p4est(meshfile, ::Val{3}) = p8est_connectivity_read_inp(meshfile) - # Refine `p4est` if refine_fn_c returns 1 # 2D -refine_p4est!(p4est::Ptr{p4est_t}, recursive, refine_fn_c, init_fn_c) = p4est_refine(p4est, recursive, refine_fn_c, init_fn_c) +function refine_p4est!(p4est::Ptr{p4est_t}, recursive, refine_fn_c, init_fn_c) + p4est_refine(p4est, recursive, refine_fn_c, init_fn_c) +end # 3D -refine_p4est!(p8est::Ptr{p8est_t}, recursive, refine_fn_c, init_fn_c) = p8est_refine(p8est, recursive, refine_fn_c, init_fn_c) - +function refine_p4est!(p8est::Ptr{p8est_t}, recursive, refine_fn_c, init_fn_c) + p8est_refine(p8est, recursive, refine_fn_c, init_fn_c) +end # Refine `p4est` if coarsen_fn_c returns 1 # 2D -coarsen_p4est!(p4est::Ptr{p4est_t}, recursive, coarsen_fn_c, init_fn_c) = p4est_coarsen(p4est, recursive, coarsen_fn_c, init_fn_c) +function coarsen_p4est!(p4est::Ptr{p4est_t}, recursive, coarsen_fn_c, init_fn_c) + p4est_coarsen(p4est, recursive, coarsen_fn_c, init_fn_c) +end # 3D -coarsen_p4est!(p8est::Ptr{p8est_t}, recursive, coarsen_fn_c, init_fn_c) = p8est_coarsen(p8est, recursive, coarsen_fn_c, init_fn_c) - +function coarsen_p4est!(p8est::Ptr{p8est_t}, recursive, coarsen_fn_c, init_fn_c) + p8est_coarsen(p8est, recursive, coarsen_fn_c, init_fn_c) +end # Create new ghost layer from p4est, only connections via faces are relevant # 2D @@ -152,11 +152,11 @@ ghost_new_p4est(p8est::Ptr{p8est_t}) = p8est_ghost_new(p8est, P4est.P8EST_CONNEC # Check if ghost layer is valid # 2D function ghost_is_valid_p4est(p4est::Ptr{p4est_t}, ghost_layer::Ptr{p4est_ghost_t}) - return p4est_ghost_is_valid(p4est, ghost_layer) + return p4est_ghost_is_valid(p4est, ghost_layer) end # 3D function ghost_is_valid_p4est(p4est::Ptr{p8est_t}, ghost_layer::Ptr{p8est_ghost_t}) - return p8est_ghost_is_valid(p4est, ghost_layer) + return p8est_ghost_is_valid(p4est, ghost_layer) end # Destroy ghost layer @@ -165,79 +165,74 @@ ghost_destroy_p4est(ghost_layer::Ptr{p4est_ghost_t}) = p4est_ghost_destroy(ghost # 3D ghost_destroy_p4est(ghost_layer::Ptr{p8est_ghost_t}) = p8est_ghost_destroy(ghost_layer) - # Let `p4est` iterate over each cell volume and cell face. # Call iter_volume_c for each cell and iter_face_c for each face. # 2D -function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer=C_NULL, - iter_volume_c=C_NULL, iter_face_c=C_NULL) - if user_data === C_NULL - user_data_ptr = user_data - elseif user_data isa AbstractArray - user_data_ptr = pointer(user_data) - else - user_data_ptr = pointer_from_objref(user_data) - end - - GC.@preserve user_data begin - p4est_iterate(p4est, - ghost_layer, - user_data_ptr, - iter_volume_c, # iter_volume - iter_face_c, # iter_face - C_NULL) # iter_corner - end - - return nothing +function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer = C_NULL, + iter_volume_c = C_NULL, iter_face_c = C_NULL) + if user_data === C_NULL + user_data_ptr = user_data + elseif user_data isa AbstractArray + user_data_ptr = pointer(user_data) + else + user_data_ptr = pointer_from_objref(user_data) + end + + GC.@preserve user_data begin + p4est_iterate(p4est, + ghost_layer, + user_data_ptr, + iter_volume_c, # iter_volume + iter_face_c, # iter_face + C_NULL) # iter_corner + end + + return nothing end # 3D -function iterate_p4est(p8est::Ptr{p8est_t}, user_data; ghost_layer=C_NULL, - iter_volume_c=C_NULL, iter_face_c=C_NULL) - if user_data === C_NULL - user_data_ptr = user_data - elseif user_data isa AbstractArray - user_data_ptr = pointer(user_data) - else - user_data_ptr = pointer_from_objref(user_data) - end - - GC.@preserve user_data begin - p8est_iterate(p8est, - ghost_layer, - user_data_ptr, - iter_volume_c, # iter_volume - iter_face_c, # iter_face - C_NULL, # iter_edge - C_NULL) # iter_corner - end - - return nothing -end +function iterate_p4est(p8est::Ptr{p8est_t}, user_data; ghost_layer = C_NULL, + iter_volume_c = C_NULL, iter_face_c = C_NULL) + if user_data === C_NULL + user_data_ptr = user_data + elseif user_data isa AbstractArray + user_data_ptr = pointer(user_data) + else + user_data_ptr = pointer_from_objref(user_data) + end + + GC.@preserve user_data begin + p8est_iterate(p8est, + ghost_layer, + user_data_ptr, + iter_volume_c, # iter_volume + iter_face_c, # iter_face + C_NULL, # iter_edge + C_NULL) # iter_corner + end + return nothing +end # Load i-th element of the sc_array info.sides of the type p[48]est_iter_face_side_t # 2D version -function unsafe_load_side(info::Ptr{p4est_iter_face_info_t}, i=1) - return unsafe_load_sc(p4est_iter_face_side_t, unsafe_load(info).sides, i) +function unsafe_load_side(info::Ptr{p4est_iter_face_info_t}, i = 1) + return unsafe_load_sc(p4est_iter_face_side_t, unsafe_load(info).sides, i) end # 3D version -function unsafe_load_side(info::Ptr{p8est_iter_face_info_t}, i=1) - return unsafe_load_sc(p8est_iter_face_side_t, unsafe_load(info).sides, i) +function unsafe_load_side(info::Ptr{p8est_iter_face_info_t}, i = 1) + return unsafe_load_sc(p8est_iter_face_side_t, unsafe_load(info).sides, i) end - # Load i-th element of the sc_array p4est.trees of the type p[48]est_tree_t # 2D version -function unsafe_load_tree(p4est::Ptr{p4est_t}, i=1) - return unsafe_load_sc(p4est_tree_t, unsafe_load(p4est).trees, i) +function unsafe_load_tree(p4est::Ptr{p4est_t}, i = 1) + return unsafe_load_sc(p4est_tree_t, unsafe_load(p4est).trees, i) end # 3D version -function unsafe_load_tree(p8est::Ptr{p8est_t}, i=1) - return unsafe_load_sc(p8est_tree_t, unsafe_load(p8est).trees, i) +function unsafe_load_tree(p8est::Ptr{p8est_t}, i = 1) + return unsafe_load_sc(p8est_tree_t, unsafe_load(p8est).trees, i) end - - end # @muladd diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl index 0695e72efac..7ed0e26b5ef 100644 --- a/src/auxiliary/precompile.jl +++ b/src/auxiliary/precompile.jl @@ -34,12 +34,10 @@ inf_timing = @snoopi tmin=0.01 begin show(stdout, mesh) show(stdout, MIME"text/plain"(), mesh) - semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) show(stdout, semi) show(stdout, MIME"text/plain"(), semi) - ############################################################################### # ODE solvers, callbacks etc. @@ -92,7 +90,6 @@ inf_timing = @snoopi tmin=0.01 begin analysis_callback, alive_callback, amr_callback, stepsize_callback); - ############################################################################### # run the simulation @@ -125,7 +122,6 @@ The latency can be measured by running julia --threads=1 -e '@time using Trixi; @time include(joinpath(examples_dir(), "2d", "elixir_advection_basic.jl"))' ``` - We add `@assert` to the precompile statements below to make sure that we don't include failing precompile statements, cf. https://timholy.github.io/SnoopCompile.jl/stable/snoopi/. If any assertions below fail, it is generally safe to just disable the failing call @@ -135,360 +131,497 @@ statements in accordance with the changes in Trixi.jl's source code. Please, fee the core developers of Trixi.jl to get help with that. =# - import StaticArrays import SciMLBase - # manually generated precompile statements function _precompile_manual_() - ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - - function equations_types_1d(RealT) - ( LinearScalarAdvectionEquation1D{RealT}, - HyperbolicDiffusionEquation1D{RealT}, - CompressibleEulerEquations1D{RealT}, - IdealGlmMhdEquations1D{RealT}, - ) - end - function equations_types_2d(RealT) - ( LinearScalarAdvectionEquation2D{RealT}, - HyperbolicDiffusionEquations2D{RealT}, - CompressibleEulerEquations2D{RealT}, - IdealGlmMhdEquations2D{RealT}, - LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)}, - ) - end - function equations_types_3d(RealT) - ( LinearScalarAdvectionEquation3D{RealT}, - HyperbolicDiffusionEquations3D{RealT}, - CompressibleEulerEquations3D{RealT}, - IdealGlmMhdEquations3D{RealT}, - LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)}, - ) - end - function equations_types(RealT) - ( LinearScalarAdvectionEquation1D{RealT}, - LinearScalarAdvectionEquation2D{RealT}, - LinearScalarAdvectionEquation3D{RealT}, - HyperbolicDiffusionEquations1D{RealT}, - HyperbolicDiffusionEquations2D{RealT}, - HyperbolicDiffusionEquations3D{RealT}, - CompressibleEulerEquations1D{RealT}, - CompressibleEulerEquations2D{RealT}, - CompressibleEulerEquations3D{RealT}, - IdealGlmMhdEquations1D{RealT}, - IdealGlmMhdEquations2D{RealT}, - IdealGlmMhdEquations3D{RealT}, - LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)}, - LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)}, - ) - end - - function basis_type_dgsem(RealT, nnodes_) - LobattoLegendreBasis{RealT,nnodes_, - # VectorT - StaticArrays.SVector{nnodes_,RealT}, - # InverseVandermondeLegendre - Matrix{RealT}, - # BoundaryMatrix - #StaticArrays.SArray{Tuple{nnodes_,2},RealT,2,2*nnodes_}, - Matrix{RealT}, - # DerivativeMatrix - #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, - Matrix{RealT}, - } - end - - function mortar_type_dgsem(RealT, nnodes_) - LobattoLegendreMortarL2{RealT,nnodes_, - # ForwardMatrix - #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, - Matrix{RealT}, - # ReverseMatrix - # StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, - Matrix{RealT}, - } - end - - function analyzer_type_dgsem(RealT, nnodes_) - polydeg = nnodes_ - 1 - nnodes_analysis = 2 * polydeg + 1 - LobattoLegendreAnalyzer{RealT,nnodes_analysis, - # VectorT - StaticArrays.SVector{nnodes_analysis,RealT}, - # Vandermonde - Array{RealT,2} - } - end - - function adaptor_type_dgsem(RealT, nnodes_) - LobattoLegendreAdaptorL2{RealT,nnodes_, - # ForwardMatrix - StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, - # Matrix{RealT}, - # ReverseMatrix - StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, - # Matrix{RealT}, - } - end - - # Constructors: mesh - for RealT in (Int, Float64,) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},RealT,RealT}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT},Tuple{RealT}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT,RealT},Tuple{RealT,RealT}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT,RealT,RealT},Tuple{RealT,RealT,RealT}}) - end - for TreeType in (SerialTree, ParallelTree), NDIMS in 1:3 - @assert Base.precompile(Tuple{typeof(Trixi.initialize!),TreeMesh{NDIMS,TreeType{NDIMS}},Int,Tuple{},Tuple{}}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{NDIMS,TreeType{NDIMS}},String,Int}) - end - - # Constructors: linear advection - for RealT in (Float64,) - @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation1D},RealT}) - @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D},RealT,RealT}) - @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D},Tuple{RealT,RealT}}) - @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D},RealT,RealT,RealT}) - @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D},Tuple{RealT,RealT,RealT}}) - end - - # Constructors: hyperbolic diffusion - for RealT in (Float64,) - @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations1D},}) - @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations2D},}) - @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations3D},}) - end - - # Constructors: Euler - for RealT in (Float64,) - @assert Base.precompile(Tuple{Type{CompressibleEulerEquations1D},RealT}) - @assert Base.precompile(Tuple{Type{CompressibleEulerEquations2D},RealT}) - @assert Base.precompile(Tuple{Type{CompressibleEulerEquations3D},RealT}) - end - - # Constructors: MHD - for RealT in (Float64,) - @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations1D},RealT}) - @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations2D},RealT}) - @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations3D},RealT}) - end - - # Constructors: LBM - for RealT in (Float64,) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},Type{LatticeBoltzmannEquations2D}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},Type{LatticeBoltzmannEquations2D}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},Type{LatticeBoltzmannEquations3D}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},Type{LatticeBoltzmannEquations3D}}) - end - - # Constructors of the basis are inherently type-unstable since we pass integers - # and use their values as parameters of static arrays. - # Nevertheless, we can still precompile methods used to construct the bases. - Base.precompile(Tuple{Type{LobattoLegendreBasis},Int}) - for RealT in (Float64,) - Base.precompile(Tuple{Type{LobattoLegendreBasis},RealT,Int}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_dhat),Vector{RealT},Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_dsplit),Vector{RealT},Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.polynomial_derivative_matrix),Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.polynomial_interpolation_matrix),Vector{RealT},Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.barycentric_weights),Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_lhat),RealT,Vector{RealT},Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.lagrange_interpolating_polynomials),RealT,Vector{RealT},Vector{RealT}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_q_and_l),Int,RealT}) - @assert Base.precompile(Tuple{typeof(Trixi.legendre_polynomial_and_derivative),Int,RealT}) - @assert Base.precompile(Tuple{typeof(Trixi.vandermonde_legendre),Vector{RealT}}) - end - @assert Base.precompile(Tuple{typeof(Trixi.gauss_lobatto_nodes_weights),Int}) - @assert Base.precompile(Tuple{typeof(Trixi.gauss_nodes_weights),Int}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_upper),Int}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_lower),Int}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper),Int,Val{:gauss}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower),Int,Val{:gauss}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper),Int,Val{:gauss_lobatto}}) - @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower),Int,Val{:gauss_lobatto}}) - - # Constructors: mortars, analyzers, adaptors - for RealT in (Float64,), polydeg in 1:7 - nnodes_ = polydeg + 1 - basis_type = basis_type_dgsem(RealT, nnodes_) - @assert Base.precompile(Tuple{typeof(Trixi.MortarL2),basis_type}) - @assert Base.precompile(Tuple{Type{Trixi.SolutionAnalyzer},basis_type}) - @assert Base.precompile(Tuple{Type{Trixi.AdaptorL2},basis_type}) - end - - # Constructors: callbacks - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:analysis_interval,),Tuple{Int}},Type{AliveCallback}}) - for RealT in (Float64,) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:cfl,),Tuple{RealT}},Type{StepsizeCallback}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:glm_scale, :cfl),Tuple{RealT,RealT}},Type{GlmSpeedCallback}}) - end - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_final_restart),Tuple{Int,Bool}},Type{SaveRestartCallback}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_initial_solution, :save_final_solution, :solution_variables),Tuple{Int,Bool,Bool,typeof(cons2cons)}},Type{SaveSolutionCallback}}) - @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_initial_solution, :save_final_solution, :solution_variables),Tuple{Int,Bool,Bool,typeof(cons2prim)}},Type{SaveSolutionCallback}}) - # TODO: AnalysisCallback? - # for RealT in (Float64,), polydeg in 1:7 - # nnodes_ = polydeg + 1 - # nnodes_analysis = 2*polydeg + 1 - # @assert Base.precompile(Tuple{Type{AnalysisCallback},RealT,Int,Bool,String,String,Trixi.LobattoLegendreAnalyzer{RealT,nnodes_analysis,Array{RealT,2}},Array{Symbol,1},Tuple{typeof(Trixi.entropy_timederivative),typeof(entropy)},StaticArrays.SArray{Tuple{1},RealT,1,1}}) - # We would need to use all special cases instead of - # Function,Trixi.AbstractVolumeIntegral - # for equations_type in equations_types(RealT) - # @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :extra_analysis_integrals),Tuple{Int,Tuple{typeof(entropy)}}},Type{AnalysisCallback},equations_type,DG{RealT,LobattoLegendreBasis{RealT,nnodes_,StaticArrays.SVector{nnodes_,RealT},Array{RealT,2},StaticArrays.SArray{Tuple{4,2},RealT,2,2*nnodes_},StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Trixi.LobattoLegendreMortarL2{RealT,nnodes_,StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Function,Trixi.AbstractVolumeIntegral}}) - # end - # end - @assert Base.precompile(Tuple{typeof(SummaryCallback)}) - @assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback), typeof(Trixi.summary_callback), typeof(Trixi.initialize_summary_callback), typeof(SciMLBase.FINALIZE_DEFAULT)}}) - @assert Base.precompile(Tuple{typeof(summary_box),Base.TTY,String,Vector{Pair{String, Any}}}) - # TODO: AMRCallback, ControllerThreeLevel, indicators - - # init_elements, interfaces, etc. - for RealT in (Float64,), polydeg in 1:7 - uEltype = RealT - nnodes_ = polydeg + 1 - mortar_type = mortar_type_dgsem(RealT, nnodes_) - - # 1D, serial - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{1,Trixi.SerialTree{1}},Trixi.ElementContainer1D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{1,Trixi.SerialTree{1}},Trixi.ElementContainer1D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{1,Trixi.SerialTree{1}},String}) - - # 2D, serial - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype},mortar_type}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{2,Trixi.SerialTree{2}},String}) - - # 2D, parallel - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype},mortar_type}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{2,Trixi.ParallelTree{2}},String}) - - # 3D, serial - @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype}}) - @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype},mortar_type}) - @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{3,Trixi.SerialTree{3}},String}) - end - - # various `show` methods - for RealT in (Float64,) - # meshes - for NDIMS in 1:3 - # serial - @assert Base.precompile(Tuple{typeof(show),Base.TTY,TreeMesh{NDIMS,Trixi.SerialTree{NDIMS}}}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",TreeMesh{NDIMS,Trixi.SerialTree{NDIMS}}}) - # parallel - @assert Base.precompile(Tuple{typeof(show),Base.TTY,TreeMesh{NDIMS,Trixi.ParallelTree{NDIMS}}}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",TreeMesh{NDIMS,Trixi.ParallelTree{NDIMS}}}) - end + ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - # equations - for eq_type in equations_types(RealT) - @assert Base.precompile(Tuple{typeof(show),Base.TTY,eq_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",eq_type}) + function equations_types_1d(RealT) + (LinearScalarAdvectionEquation1D{RealT}, + HyperbolicDiffusionEquations1D{RealT}, + CompressibleEulerEquations1D{RealT}, + IdealGlmMhdEquations1D{RealT}) + end + function equations_types_2d(RealT) + (LinearScalarAdvectionEquation2D{RealT}, + HyperbolicDiffusionEquations2D{RealT}, + CompressibleEulerEquations2D{RealT}, + IdealGlmMhdEquations2D{RealT}, + LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)}) + end + function equations_types_3d(RealT) + (LinearScalarAdvectionEquation3D{RealT}, + HyperbolicDiffusionEquations3D{RealT}, + CompressibleEulerEquations3D{RealT}, + IdealGlmMhdEquations3D{RealT}, + LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)}) + end + function equations_types(RealT) + (LinearScalarAdvectionEquation1D{RealT}, + LinearScalarAdvectionEquation2D{RealT}, + LinearScalarAdvectionEquation3D{RealT}, + HyperbolicDiffusionEquations1D{RealT}, + HyperbolicDiffusionEquations2D{RealT}, + HyperbolicDiffusionEquations3D{RealT}, + CompressibleEulerEquations1D{RealT}, + CompressibleEulerEquations2D{RealT}, + CompressibleEulerEquations3D{RealT}, + IdealGlmMhdEquations1D{RealT}, + IdealGlmMhdEquations2D{RealT}, + IdealGlmMhdEquations3D{RealT}, + LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)}, + LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)}) end - # mortars, analyzers, adaptors, DG - for polydeg in 1:1 - nnodes_ = polydeg + 1 - basis_type = basis_type_dgsem(RealT, nnodes_) - mortar_type = mortar_type_dgsem(RealT, nnodes_) - analyzer_type = analyzer_type_dgsem(RealT, nnodes_) - adaptor_type = adaptor_type_dgsem(RealT, nnodes_) - - @assert Base.precompile(Tuple{typeof(show),Base.TTY,basis_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",basis_type}) - - @assert Base.precompile(Tuple{typeof(show),Base.TTY,mortar_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",mortar_type}) - - @assert Base.precompile(Tuple{typeof(show),Base.TTY,analyzer_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",analyzer_type}) - - @assert Base.precompile(Tuple{typeof(show),Base.TTY,adaptor_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",adaptor_type}) - - # we could also use more numerical fluxes and volume integral types here - @assert Base.precompile(Tuple{typeof(show),Base.TTY,DG{basis_type,mortar_type,typeof(flux_lax_friedrichs),VolumeIntegralWeakForm}}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",DG{basis_type,mortar_type,typeof(flux_lax_friedrichs),VolumeIntegralWeakForm}}) + function basis_type_dgsem(RealT, nnodes_) + LobattoLegendreBasis{RealT, nnodes_, + # VectorT + StaticArrays.SVector{nnodes_, RealT}, + # InverseVandermondeLegendre + Matrix{RealT}, + # BoundaryMatrix + #StaticArrays.SArray{Tuple{nnodes_,2},RealT,2,2*nnodes_}, + Matrix{RealT}, + # DerivativeMatrix + #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, + Matrix{RealT} + } end - # semidiscretizations - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",SemidiscretizationHyperbolic}) + function mortar_type_dgsem(RealT, nnodes_) + LobattoLegendreMortarL2{RealT, nnodes_, + # ForwardMatrix + #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, + Matrix{RealT}, + # ReverseMatrix + # StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}, + Matrix{RealT} + } + end - # callbacks - summary_callback_type = DiscreteCallback{typeof(Trixi.summary_callback),typeof(Trixi.summary_callback),typeof(Trixi.initialize_summary_callback),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,summary_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",summary_callback_type}) - @assert Base.precompile(Tuple{summary_callback_type,Base.TTY}) + function analyzer_type_dgsem(RealT, nnodes_) + polydeg = nnodes_ - 1 + nnodes_analysis = 2 * polydeg + 1 + LobattoLegendreAnalyzer{RealT, nnodes_analysis, + # VectorT + StaticArrays.SVector{nnodes_analysis, RealT}, + # Vandermonde + Array{RealT, 2} + } + end - # TODO: SteadyStateCallback, AnalysisCallback + function adaptor_type_dgsem(RealT, nnodes_) + LobattoLegendreAdaptorL2{RealT, nnodes_, + # ForwardMatrix + StaticArrays.SArray{Tuple{nnodes_, nnodes_}, RealT, 2, + nnodes_^2}, + # Matrix{RealT}, + # ReverseMatrix + StaticArrays.SArray{Tuple{nnodes_, nnodes_}, RealT, 2, + nnodes_^2} + # Matrix{RealT}, + } + end - alive_callback_type = DiscreteCallback{AliveCallback,AliveCallback,typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,alive_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",alive_callback_type}) + # Constructors: mesh + for RealT in (Int, Float64) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:initial_refinement_level, :n_cells_max), + Tuple{Int, Int}}, Type{TreeMesh}, RealT, + RealT}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:initial_refinement_level, :n_cells_max), + Tuple{Int, Int}}, Type{TreeMesh}, + Tuple{RealT}, Tuple{RealT}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:initial_refinement_level, :n_cells_max), + Tuple{Int, Int}}, Type{TreeMesh}, + Tuple{RealT, RealT}, Tuple{RealT, RealT}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:initial_refinement_level, :n_cells_max), + Tuple{Int, Int}}, Type{TreeMesh}, + Tuple{RealT, RealT, RealT}, Tuple{RealT, RealT, RealT + }}) + end + for TreeType in (SerialTree, ParallelTree), NDIMS in 1:3 + @assert Base.precompile(Tuple{typeof(Trixi.initialize!), + TreeMesh{NDIMS, TreeType{NDIMS}}, Int, Tuple{}, + Tuple{}}) + @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + TreeMesh{NDIMS, TreeType{NDIMS}}, String, Int}) + end - restart_callback_type = DiscreteCallback{SaveRestartCallback,SaveRestartCallback,typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,restart_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",restart_callback_type}) + # Constructors: linear advection + for RealT in (Float64,) + @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation1D}, RealT}) + @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D}, RealT, RealT}) + @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D}, + Tuple{RealT, RealT}}) + @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D}, RealT, RealT, + RealT}) + @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D}, + Tuple{RealT, RealT, RealT}}) + end - for solution_variables in (cons2cons, cons2prim) - save_solution_callback_type = DiscreteCallback{SaveSolutionCallback{typeof(solution_variables)},SaveSolutionCallback{typeof(solution_variables)},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,save_solution_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",save_solution_callback_type}) + # Constructors: hyperbolic diffusion + for RealT in (Float64,) + @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations1D}}) + @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations2D}}) + @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations3D}}) end - # TODO: AMRCallback + # Constructors: Euler + for RealT in (Float64,) + @assert Base.precompile(Tuple{Type{CompressibleEulerEquations1D}, RealT}) + @assert Base.precompile(Tuple{Type{CompressibleEulerEquations2D}, RealT}) + @assert Base.precompile(Tuple{Type{CompressibleEulerEquations3D}, RealT}) + end - stepsize_callback_type = DiscreteCallback{StepsizeCallback{RealT},StepsizeCallback{RealT},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,stepsize_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",stepsize_callback_type}) + # Constructors: MHD + for RealT in (Float64,) + @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations1D}, RealT}) + @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations2D}, RealT}) + @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations3D}, RealT}) + end - glm_speed_callback_type = DiscreteCallback{GlmSpeedCallback{RealT},GlmSpeedCallback{RealT},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,glm_speed_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",glm_speed_callback_type}) + # Constructors: LBM + for RealT in (Float64,) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}}, + Type{LatticeBoltzmannEquations2D}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:Ma, :Re), Tuple{RealT, Int}}, + Type{LatticeBoltzmannEquations2D}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}}, + Type{LatticeBoltzmannEquations3D}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:Ma, :Re), Tuple{RealT, Int}}, + Type{LatticeBoltzmannEquations3D}}) + end - lbm_collision_callback_type = DiscreteCallback{typeof(Trixi.lbm_collision_callback),typeof(Trixi.lbm_collision_callback),typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)} - @assert Base.precompile(Tuple{typeof(show),Base.TTY,lbm_collision_callback_type}) - @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",lbm_collision_callback_type}) + # Constructors of the basis are inherently type-unstable since we pass integers + # and use their values as parameters of static arrays. + # Nevertheless, we can still precompile methods used to construct the bases. + Base.precompile(Tuple{Type{LobattoLegendreBasis}, Int}) + for RealT in (Float64,) + Base.precompile(Tuple{Type{LobattoLegendreBasis}, RealT, Int}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_dhat), Vector{RealT}, Vector{RealT} + }) + @assert Base.precompile(Tuple{typeof(Trixi.calc_dsplit), Vector{RealT}, + Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.polynomial_derivative_matrix), + Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.polynomial_interpolation_matrix), + Vector{RealT}, Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.barycentric_weights), Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_lhat), RealT, Vector{RealT}, + Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.lagrange_interpolating_polynomials), + RealT, Vector{RealT}, Vector{RealT}}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_q_and_l), Int, RealT}) + @assert Base.precompile(Tuple{typeof(Trixi.legendre_polynomial_and_derivative), Int, + RealT}) + @assert Base.precompile(Tuple{typeof(Trixi.vandermonde_legendre), Vector{RealT}}) + end + @assert Base.precompile(Tuple{typeof(Trixi.gauss_lobatto_nodes_weights), Int}) + @assert Base.precompile(Tuple{typeof(Trixi.gauss_nodes_weights), Int}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_upper), Int}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_lower), Int}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int, Val{:gauss}}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int, Val{:gauss}}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int, Val{:gauss_lobatto + }}) + @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int, Val{:gauss_lobatto + }}) + + # Constructors: mortars, analyzers, adaptors + for RealT in (Float64,), polydeg in 1:7 + nnodes_ = polydeg + 1 + basis_type = basis_type_dgsem(RealT, nnodes_) + @assert Base.precompile(Tuple{typeof(Trixi.MortarL2), basis_type}) + @assert Base.precompile(Tuple{Type{Trixi.SolutionAnalyzer}, basis_type}) + @assert Base.precompile(Tuple{Type{Trixi.AdaptorL2}, basis_type}) + end - # infrastructure, special elixirs - @assert Base.precompile(Tuple{typeof(trixi_include),String}) - end + # Constructors: callbacks + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:analysis_interval,), Tuple{Int}}, + Type{AliveCallback}}) + for RealT in (Float64,) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:cfl,), Tuple{RealT}}, + Type{StepsizeCallback}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:glm_scale, :cfl), Tuple{RealT, RealT}}, + Type{GlmSpeedCallback}}) + end + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{(:interval, :save_final_restart), + Tuple{Int, Bool}}, Type{SaveRestartCallback}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{ + (:interval, :save_initial_solution, + :save_final_solution, :solution_variables), + Tuple{Int, Bool, Bool, typeof(cons2cons)}}, + Type{SaveSolutionCallback}}) + @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)), + NamedTuple{ + (:interval, :save_initial_solution, + :save_final_solution, :solution_variables), + Tuple{Int, Bool, Bool, typeof(cons2prim)}}, + Type{SaveSolutionCallback}}) + # TODO: AnalysisCallback? + # for RealT in (Float64,), polydeg in 1:7 + # nnodes_ = polydeg + 1 + # nnodes_analysis = 2*polydeg + 1 + # @assert Base.precompile(Tuple{Type{AnalysisCallback},RealT,Int,Bool,String,String,Trixi.LobattoLegendreAnalyzer{RealT,nnodes_analysis,Array{RealT,2}},Array{Symbol,1},Tuple{typeof(Trixi.entropy_timederivative),typeof(entropy)},StaticArrays.SArray{Tuple{1},RealT,1,1}}) + # We would need to use all special cases instead of + # Function,Trixi.AbstractVolumeIntegral + # for equations_type in equations_types(RealT) + # @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :extra_analysis_integrals),Tuple{Int,Tuple{typeof(entropy)}}},Type{AnalysisCallback},equations_type,DG{RealT,LobattoLegendreBasis{RealT,nnodes_,StaticArrays.SVector{nnodes_,RealT},Array{RealT,2},StaticArrays.SArray{Tuple{4,2},RealT,2,2*nnodes_},StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Trixi.LobattoLegendreMortarL2{RealT,nnodes_,StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Function,Trixi.AbstractVolumeIntegral}}) + # end + # end + @assert Base.precompile(Tuple{typeof(SummaryCallback)}) + @assert Base.precompile(Tuple{ + DiscreteCallback{typeof(Trixi.summary_callback), + typeof(Trixi.summary_callback), + typeof(Trixi.initialize_summary_callback), + typeof(SciMLBase.FINALIZE_DEFAULT)}}) + @assert Base.precompile(Tuple{typeof(summary_box), Base.TTY, String, + Vector{Pair{String, Any}}}) + # TODO: AMRCallback, ControllerThreeLevel, indicators + + # init_elements, interfaces, etc. + for RealT in (Float64,), polydeg in 1:7 + uEltype = RealT + nnodes_ = polydeg + 1 + mortar_type = mortar_type_dgsem(RealT, nnodes_) + + # 1D, serial + @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + TreeMesh{1, Trixi.SerialTree{1}}, + Trixi.ElementContainer1D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + TreeMesh{1, Trixi.SerialTree{1}}, + Trixi.ElementContainer1D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + TreeMesh{1, Trixi.SerialTree{1}}, String}) + + # 2D, serial + @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + TreeMesh{2, Trixi.SerialTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + TreeMesh{2, Trixi.SerialTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, + TreeMesh{2, Trixi.SerialTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}, mortar_type + }) + @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + TreeMesh{2, Trixi.SerialTree{2}}, String}) + + # 2D, parallel + @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + TreeMesh{2, Trixi.ParallelTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + TreeMesh{2, Trixi.ParallelTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, + TreeMesh{2, Trixi.ParallelTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}, mortar_type + }) + @assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces), Array{Int, 1}, + TreeMesh{2, Trixi.ParallelTree{2}}, + Trixi.ElementContainer2D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + TreeMesh{2, Trixi.ParallelTree{2}}, String}) + + # 3D, serial + @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1}, + TreeMesh{3, Trixi.SerialTree{3}}, + Trixi.ElementContainer3D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1}, + TreeMesh{3, Trixi.SerialTree{3}}, + Trixi.ElementContainer3D{RealT, uEltype}}) + @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1}, + TreeMesh{3, Trixi.SerialTree{3}}, + Trixi.ElementContainer3D{RealT, uEltype}, mortar_type + }) + @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file), + TreeMesh{3, Trixi.SerialTree{3}}, String}) + end - @assert Base.precompile(Tuple{typeof(init_mpi)}) - @assert Base.precompile(Tuple{typeof(init_p4est)}) + # various `show` methods + for RealT in (Float64,) + # meshes + for NDIMS in 1:3 + # serial + @assert Base.precompile(Tuple{typeof(show), Base.TTY, + TreeMesh{NDIMS, Trixi.SerialTree{NDIMS}}}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", + TreeMesh{NDIMS, Trixi.SerialTree{NDIMS}}}) + # parallel + @assert Base.precompile(Tuple{typeof(show), Base.TTY, + TreeMesh{NDIMS, Trixi.ParallelTree{NDIMS}}}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", + TreeMesh{NDIMS, Trixi.ParallelTree{NDIMS}}}) + end + + # equations + for eq_type in equations_types(RealT) + @assert Base.precompile(Tuple{typeof(show), Base.TTY, eq_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", eq_type}) + end + + # mortars, analyzers, adaptors, DG + for polydeg in 1:1 + nnodes_ = polydeg + 1 + basis_type = basis_type_dgsem(RealT, nnodes_) + mortar_type = mortar_type_dgsem(RealT, nnodes_) + analyzer_type = analyzer_type_dgsem(RealT, nnodes_) + adaptor_type = adaptor_type_dgsem(RealT, nnodes_) + + @assert Base.precompile(Tuple{typeof(show), Base.TTY, basis_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", basis_type}) + + @assert Base.precompile(Tuple{typeof(show), Base.TTY, mortar_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", mortar_type}) + + @assert Base.precompile(Tuple{typeof(show), Base.TTY, analyzer_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", analyzer_type}) + + @assert Base.precompile(Tuple{typeof(show), Base.TTY, adaptor_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", adaptor_type}) + + # we could also use more numerical fluxes and volume integral types here + @assert Base.precompile(Tuple{typeof(show), Base.TTY, + DG{basis_type, mortar_type, + typeof(flux_lax_friedrichs), + VolumeIntegralWeakForm}}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", + DG{basis_type, mortar_type, + typeof(flux_lax_friedrichs), + VolumeIntegralWeakForm}}) + end + + # semidiscretizations + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + SemidiscretizationHyperbolic}) + + # callbacks + summary_callback_type = DiscreteCallback{typeof(Trixi.summary_callback), + typeof(Trixi.summary_callback), + typeof(Trixi.initialize_summary_callback), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, summary_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + summary_callback_type}) + @assert Base.precompile(Tuple{summary_callback_type, Base.TTY}) + + # TODO: SteadyStateCallback, AnalysisCallback + + alive_callback_type = DiscreteCallback{AliveCallback, AliveCallback, + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, alive_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + alive_callback_type}) + + restart_callback_type = DiscreteCallback{SaveRestartCallback, SaveRestartCallback, + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, restart_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + restart_callback_type}) + + for solution_variables in (cons2cons, cons2prim) + save_solution_callback_type = DiscreteCallback{ + SaveSolutionCallback{ + typeof(solution_variables) + }, + SaveSolutionCallback{ + typeof(solution_variables) + }, + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT) + } + @assert Base.precompile(Tuple{typeof(show), Base.TTY, + save_solution_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, + MIME"text/plain", save_solution_callback_type}) + end + + # TODO: AMRCallback + + stepsize_callback_type = DiscreteCallback{StepsizeCallback{RealT}, + StepsizeCallback{RealT}, + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, stepsize_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + stepsize_callback_type}) + + glm_speed_callback_type = DiscreteCallback{GlmSpeedCallback{RealT}, + GlmSpeedCallback{RealT}, + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, glm_speed_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + glm_speed_callback_type}) + + lbm_collision_callback_type = DiscreteCallback{typeof(Trixi.lbm_collision_callback), + typeof(Trixi.lbm_collision_callback), + typeof(Trixi.initialize!), + typeof(SciMLBase.FINALIZE_DEFAULT)} + @assert Base.precompile(Tuple{typeof(show), Base.TTY, lbm_collision_callback_type}) + @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + lbm_collision_callback_type}) + + # infrastructure, special elixirs + @assert Base.precompile(Tuple{typeof(trixi_include), String}) + end - # The following precompile statements do not seem to be taken - # # `multiply_dimensionwise!` as used in the analysis callback - # for RealT in (Float64,) - # # 1D version - # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 2},Matrix{RealT},SubArray{RealT, 2, Array{RealT, 3}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true}}) - # # 2D version - # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 3},Matrix{RealT},SubArray{RealT, 3, Array{RealT, 4}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 3}}) - # # 3D version - # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 4},Matrix{RealT},SubArray{RealT, 4, Array{RealT, 5}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 4},Array{RealT, 4}}) - # end + @assert Base.precompile(Tuple{typeof(init_mpi)}) + @assert Base.precompile(Tuple{typeof(init_p4est)}) + + # The following precompile statements do not seem to be taken + # # `multiply_dimensionwise!` as used in the analysis callback + # for RealT in (Float64,) + # # 1D version + # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 2},Matrix{RealT},SubArray{RealT, 2, Array{RealT, 3}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true}}) + # # 2D version + # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 3},Matrix{RealT},SubArray{RealT, 3, Array{RealT, 4}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 3}}) + # # 3D version + # @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 4},Matrix{RealT},SubArray{RealT, 4, Array{RealT, 5}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 4},Array{RealT, 4}}) + # end - return nothing + return nothing end - # Explicit precompilation running code only on Julia v1.9 and newer using PrecompileTools: @setup_workload, @compile_workload @static if VERSION >= v"1.9.0-beta4" - @setup_workload begin - # Setup code can go here - - @compile_workload begin - # Everything inside this block will run at precompile time, saving the - # binary code to a cache in newer versions of Julia. - DGSEM(3) + @setup_workload begin + # Setup code can go here + + @compile_workload begin + # Everything inside this block will run at precompile time, saving the + # binary code to a cache in newer versions of Julia. + DGSEM(3) + end end - end end diff --git a/src/auxiliary/special_elixirs.jl b/src/auxiliary/special_elixirs.jl index 0724c62bcba..da73b42e572 100644 --- a/src/auxiliary/special_elixirs.jl +++ b/src/auxiliary/special_elixirs.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Note: We can't call the method below `Trixi.include` since that is created automatically # inside `module Trixi` to `include` source files and evaluate them within the global scope @@ -36,15 +36,16 @@ julia> redirect_stdout(devnull) do ``` """ function trixi_include(mod::Module, elixir::AbstractString; kwargs...) - # Print information on potential wait time only in non-parallel case - if !mpi_isparallel() - @info "You just called `trixi_include`. Julia may now compile the code, please be patient." - end - Base.include(ex -> replace_assignments(insert_maxiters(ex); kwargs...), mod, elixir) + # Print information on potential wait time only in non-parallel case + if !mpi_isparallel() + @info "You just called `trixi_include`. Julia may now compile the code, please be patient." + end + Base.include(ex -> replace_assignments(insert_maxiters(ex); kwargs...), mod, elixir) end -trixi_include(elixir::AbstractString; kwargs...) = trixi_include(Main, elixir; kwargs...) - +function trixi_include(elixir::AbstractString; kwargs...) + trixi_include(Main, elixir; kwargs...) +end """ convergence_test([mod::Module=Main,] elixir::AbstractString, iterations; kwargs...) @@ -60,94 +61,97 @@ This function assumes that the spatial resolution is set via the keywords integers, one per spatial dimension). """ function convergence_test(mod::Module, elixir::AbstractString, iterations; kwargs...) - @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") + @assert(iterations>1, + "Number of iterations must be bigger than 1 for a convergence analysis") - # Types of errors to be calculated - errors = Dict(:l2 => Float64[], :linf => Float64[]) + # Types of errors to be calculated + errors = Dict(:l2 => Float64[], :linf => Float64[]) - initial_resolution = extract_initial_resolution(elixir, kwargs) + initial_resolution = extract_initial_resolution(elixir, kwargs) - # run simulations and extract errors - for iter in 1:iterations - println("Running convtest iteration ", iter, "/", iterations) + # run simulations and extract errors + for iter in 1:iterations + println("Running convtest iteration ", iter, "/", iterations) - include_refined(mod, elixir, initial_resolution, iter; kwargs) + include_refined(mod, elixir, initial_resolution, iter; kwargs) - l2_error, linf_error = mod.analysis_callback(mod.sol) + l2_error, linf_error = mod.analysis_callback(mod.sol) - # collect errors as one vector to reshape later - append!(errors[:l2], l2_error) - append!(errors[:linf], linf_error) + # collect errors as one vector to reshape later + append!(errors[:l2], l2_error) + append!(errors[:linf], linf_error) - println("\n\n") - println("#"^100) - end + println("\n\n") + println("#"^100) + end - # number of variables - _, equations, _, _ = mesh_equations_solver_cache(mod.semi) - variablenames = varnames(cons2cons, equations) - nvariables = length(variablenames) + # number of variables + _, equations, _, _ = mesh_equations_solver_cache(mod.semi) + variablenames = varnames(cons2cons, equations) + nvariables = length(variablenames) - # Reshape errors to get a matrix where the i-th row represents the i-th iteration - # and the j-th column represents the j-th variable - errorsmatrix = Dict(kind => transpose(reshape(error, (nvariables, iterations))) for (kind, error) in errors) + # Reshape errors to get a matrix where the i-th row represents the i-th iteration + # and the j-th column represents the j-th variable + errorsmatrix = Dict(kind => transpose(reshape(error, (nvariables, iterations))) + for (kind, error) in errors) - # Calculate EOCs where the columns represent the variables - # As dx halves in every iteration the denominator needs to be log(1/2) - eocs = Dict(kind => log.(error[2:end, :] ./ error[1:end-1, :]) ./ log(1 / 2) for (kind, error) in errorsmatrix) + # Calculate EOCs where the columns represent the variables + # As dx halves in every iteration the denominator needs to be log(1/2) + eocs = Dict(kind => log.(error[2:end, :] ./ error[1:(end - 1), :]) ./ log(1 / 2) + for (kind, error) in errorsmatrix) - eoc_mean_values = Dict{Symbol,Any}() - eoc_mean_values[:variables] = variablenames + eoc_mean_values = Dict{Symbol, Any}() + eoc_mean_values[:variables] = variablenames - for (kind, error) in errorsmatrix - println(kind) + for (kind, error) in errorsmatrix + println(kind) - for v in variablenames - @printf("%-20s", v) - end - println("") + for v in variablenames + @printf("%-20s", v) + end + println("") - for k = 1:nvariables - @printf("%-10s", "error") - @printf("%-10s", "EOC") - end - println("") + for k in 1:nvariables + @printf("%-10s", "error") + @printf("%-10s", "EOC") + end + println("") - # Print errors for the first iteration - for k = 1:nvariables - @printf("%-10.2e", error[1, k]) - @printf("%-10s", "-") - end - println("") - - # For the following iterations print errors and EOCs - for j = 2:iterations - for k = 1:nvariables - @printf("%-10.2e", error[j, k]) - @printf("%-10.2f", eocs[kind][j-1, k]) - end - println("") - end - println("") - - # Print mean EOCs - mean_values = zeros(nvariables) - for v in 1:nvariables - mean_values[v] = sum(eocs[kind][:, v]) ./ length(eocs[kind][:, v]) - @printf("%-10s", "mean") - @printf("%-10.2f", mean_values[v]) + # Print errors for the first iteration + for k in 1:nvariables + @printf("%-10.2e", error[1, k]) + @printf("%-10s", "-") + end + println("") + + # For the following iterations print errors and EOCs + for j in 2:iterations + for k in 1:nvariables + @printf("%-10.2e", error[j, k]) + @printf("%-10.2f", eocs[kind][j - 1, k]) + end + println("") + end + println("") + + # Print mean EOCs + mean_values = zeros(nvariables) + for v in 1:nvariables + mean_values[v] = sum(eocs[kind][:, v]) ./ length(eocs[kind][:, v]) + @printf("%-10s", "mean") + @printf("%-10.2f", mean_values[v]) + end + eoc_mean_values[kind] = mean_values + println("") + println("-"^100) end - eoc_mean_values[kind] = mean_values - println("") - println("-"^100) - end - return eoc_mean_values + return eoc_mean_values end -convergence_test(elixir::AbstractString, iterations; kwargs...) = convergence_test(Main, elixir::AbstractString, iterations; kwargs...) - - +function convergence_test(elixir::AbstractString, iterations; kwargs...) + convergence_test(Main, elixir::AbstractString, iterations; kwargs...) +end # Helper methods used in the functions defined above @@ -158,130 +162,134 @@ walkexpr(f, x) = f(x) # Insert the keyword argument `maxiters` into calls to `solve` and `Trixi.solve` # with default value `10^5` if it is not already present. function insert_maxiters(expr) - maxiters_default = 10^5 - - expr = walkexpr(expr) do x - if x isa Expr - is_plain_solve = x.head === Symbol("call") && x.args[1] === Symbol("solve") - is_trixi_solve = (x.head === Symbol("call") && x.args[1] isa Expr && - x.args[1].head === Symbol(".") && - x.args[1].args[1] === Symbol("Trixi") && - x.args[1].args[2] isa QuoteNode && - x.args[1].args[2].value === Symbol("solve")) - - if is_plain_solve || is_trixi_solve - # Do nothing if `maxiters` is already set as keyword argument... - for arg in x.args - # This detects the case where `maxiters` is set as keyword argument - # without or before a semicolon - if (arg isa Expr && arg.head === Symbol("kw") && arg.args[1] === Symbol("maxiters")) - return x - end - - # This detects the case where maxiters is set as keyword argument - # after a semicolon - if (arg isa Expr && arg.head === Symbol("parameters")) - # We need to check each keyword argument listed here - for nested_arg in arg.args - if (nested_arg isa Expr && nested_arg.head === Symbol("kw") && - nested_arg.args[1] === Symbol("maxiters")) - return x - end + maxiters_default = 10^5 + + expr = walkexpr(expr) do x + if x isa Expr + is_plain_solve = x.head === Symbol("call") && x.args[1] === Symbol("solve") + is_trixi_solve = (x.head === Symbol("call") && x.args[1] isa Expr && + x.args[1].head === Symbol(".") && + x.args[1].args[1] === Symbol("Trixi") && + x.args[1].args[2] isa QuoteNode && + x.args[1].args[2].value === Symbol("solve")) + + if is_plain_solve || is_trixi_solve + # Do nothing if `maxiters` is already set as keyword argument... + for arg in x.args + # This detects the case where `maxiters` is set as keyword argument + # without or before a semicolon + if (arg isa Expr && arg.head === Symbol("kw") && + arg.args[1] === Symbol("maxiters")) + return x + end + + # This detects the case where maxiters is set as keyword argument + # after a semicolon + if (arg isa Expr && arg.head === Symbol("parameters")) + # We need to check each keyword argument listed here + for nested_arg in arg.args + if (nested_arg isa Expr && + nested_arg.head === Symbol("kw") && + nested_arg.args[1] === Symbol("maxiters")) + return x + end + end + end + end + + # ...and insert it otherwise. + push!(x.args, Expr(Symbol("kw"), Symbol("maxiters"), maxiters_default)) end - end end - - # ...and insert it otherwise. - push!(x.args, Expr(Symbol("kw"), Symbol("maxiters"), maxiters_default)) - end + return x end - return x - end - return expr + return expr end # Replace assignments to `key` in `expr` by `key = val` for all `(key,val)` in `kwargs`. function replace_assignments(expr; kwargs...) - # replace explicit and keyword assignments - expr = walkexpr(expr) do x - if x isa Expr - for (key,val) in kwargs - if (x.head === Symbol("=") || x.head === :kw) && x.args[1] === Symbol(key) - x.args[2] = :( $val ) - # dump(x) + # replace explicit and keyword assignments + expr = walkexpr(expr) do x + if x isa Expr + for (key, val) in kwargs + if (x.head === Symbol("=") || x.head === :kw) && + x.args[1] === Symbol(key) + x.args[2] = :($val) + # dump(x) + end + end end - end + return x end - return x - end - return expr + return expr end # find a (keyword or common) assignment to `destination` in `expr` # and return the assigned value function find_assignment(expr, destination) - # declare result to be able to assign to it in the closure - local result - - # find explicit and keyword assignments - walkexpr(expr) do x - if x isa Expr - if (x.head === Symbol("=") || x.head === :kw) && x.args[1] === Symbol(destination) - result = x.args[2] - # dump(x) - end + # declare result to be able to assign to it in the closure + local result + + # find explicit and keyword assignments + walkexpr(expr) do x + if x isa Expr + if (x.head === Symbol("=") || x.head === :kw) && + x.args[1] === Symbol(destination) + result = x.args[2] + # dump(x) + end + end + return x end - return x - end - result + result end # searches the parameter that specifies the mesh reslution in the elixir function extract_initial_resolution(elixir, kwargs) - code = read(elixir, String) - expr = Meta.parse("begin \n$code \nend") + code = read(elixir, String) + expr = Meta.parse("begin \n$code \nend") - try - # get the initial_refinement_level from the elixir - initial_refinement_level = find_assignment(expr, :initial_refinement_level) + try + # get the initial_refinement_level from the elixir + initial_refinement_level = find_assignment(expr, :initial_refinement_level) - if haskey(kwargs, :initial_refinement_level) - return kwargs[:initial_refinement_level] - else - return initial_refinement_level - end - catch e - if isa(e, UndefVarError) - # get cells_per_dimension from the elixir - cells_per_dimension = eval(find_assignment(expr, :cells_per_dimension)) - - if haskey(kwargs, :cells_per_dimension) - return kwargs[:cells_per_dimension] - else - return cells_per_dimension - end - else - throw(e) + if haskey(kwargs, :initial_refinement_level) + return kwargs[:initial_refinement_level] + else + return initial_refinement_level + end + catch e + if isa(e, UndefVarError) + # get cells_per_dimension from the elixir + cells_per_dimension = eval(find_assignment(expr, :cells_per_dimension)) + + if haskey(kwargs, :cells_per_dimension) + return kwargs[:cells_per_dimension] + else + return cells_per_dimension + end + else + throw(e) + end end - end end # runs the specified elixir with a doubled resolution each time iter is increased by 1 # works for TreeMesh function include_refined(mod, elixir, initial_refinement_level::Int, iter; kwargs) - trixi_include(mod, elixir; kwargs..., initial_refinement_level=initial_refinement_level+iter-1) + trixi_include(mod, elixir; kwargs..., + initial_refinement_level = initial_refinement_level + iter - 1) end # runs the specified elixir with a doubled resolution each time iter is increased by 1 # works for StructuredMesh -function include_refined(mod, elixir, cells_per_dimension::NTuple{NDIMS, Int}, iter; kwargs) where {NDIMS} - new_cells_per_dimension = cells_per_dimension .* 2^(iter - 1) +function include_refined(mod, elixir, cells_per_dimension::NTuple{NDIMS, Int}, iter; + kwargs) where {NDIMS} + new_cells_per_dimension = cells_per_dimension .* 2^(iter - 1) - trixi_include(mod, elixir; kwargs..., cells_per_dimension=new_cells_per_dimension) + trixi_include(mod, elixir; kwargs..., cells_per_dimension = new_cells_per_dimension) end - - end # @muladd diff --git a/src/basic_types.jl b/src/basic_types.jl index 4539e26dea3..ee479a62039 100644 --- a/src/basic_types.jl +++ b/src/basic_types.jl @@ -3,14 +3,13 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # abstract supertype of specific semidiscretizations such as # - SemidiscretizationHyperbolic for hyperbolic conservation laws # - SemidiscretizationEulerGravity for Euler with self-gravity abstract type AbstractSemidiscretization end - """ AbstractEquations{NDIMS, NVARS} @@ -20,7 +19,6 @@ number of primary variables (`NVARS`) of the physics model. """ abstract type AbstractEquations{NDIMS, NVARS} end - """ AbstractMesh{NDIMS} @@ -29,36 +27,30 @@ The type parameters encode the number of spatial dimensions (`NDIMS`). """ abstract type AbstractMesh{NDIMS} end - # abstract supertype of specific SBP bases such as a Lobatto-Legendre nodal basis -abstract type AbstractBasisSBP{RealT<:Real} end - +abstract type AbstractBasisSBP{RealT <: Real} end # abstract supertype of mortar methods, e.g. using L² projections -abstract type AbstractMortar{RealT<:Real} end +abstract type AbstractMortar{RealT <: Real} end # abstract supertype of mortar methods using L² projection # which will be specialized for different SBP bases -abstract type AbstractMortarL2{RealT<:Real} <: AbstractMortar{RealT} end - +abstract type AbstractMortarL2{RealT <: Real} <: AbstractMortar{RealT} end # abstract supertype of functionality related to the analysis of # numerical solutions, e.g. the calculation of errors -abstract type SolutionAnalyzer{RealT<:Real} end - +abstract type SolutionAnalyzer{RealT <: Real} end # abstract supertype of grid-transfer methods used for AMR, # e.g. refinement and coarsening based on L² projections -abstract type AdaptorAMR{RealT<:Real} end +abstract type AdaptorAMR{RealT <: Real} end # abstract supertype of AMR grid-transfer operations using L² projections # which will be specialized for different SBP bases -abstract type AdaptorL2{RealT<:Real} <: AdaptorAMR{RealT} end - +abstract type AdaptorL2{RealT <: Real} <: AdaptorAMR{RealT} end # TODO: Taal decide, which abstract types shall be defined here? - struct BoundaryConditionPeriodic end """ @@ -68,28 +60,30 @@ A singleton struct indicating periodic boundary conditions. """ const boundary_condition_periodic = BoundaryConditionPeriodic() -Base.show(io::IO, ::BoundaryConditionPeriodic) = print(io, "boundary_condition_periodic") - +function Base.show(io::IO, ::BoundaryConditionPeriodic) + print(io, "boundary_condition_periodic") +end struct BoundaryConditionDoNothing end # This version can be called by hyperbolic solvers on logically Cartesian meshes -@inline function (::BoundaryConditionDoNothing)( - u_inner, orientation_or_normal_direction, direction::Integer, x, t, surface_flux, equations) - - return flux(u_inner, orientation_or_normal_direction, equations) +@inline function (::BoundaryConditionDoNothing)(u_inner, + orientation_or_normal_direction, + direction::Integer, x, t, surface_flux, + equations) + return flux(u_inner, orientation_or_normal_direction, equations) end # This version can be called by hyperbolic solvers on unstructured, curved meshes -@inline function (::BoundaryConditionDoNothing)(u_inner, outward_direction::AbstractVector, +@inline function (::BoundaryConditionDoNothing)(u_inner, + outward_direction::AbstractVector, x, t, surface_flux, equations) - - return flux(u_inner, outward_direction, equations) + return flux(u_inner, outward_direction, equations) end # This version can be called by parabolic solvers @inline function (::BoundaryConditionDoNothing)(inner_flux_or_state, other_args...) - return inner_flux_or_state + return inner_flux_or_state end """ @@ -99,6 +93,7 @@ Imposing no boundary condition just evaluates the flux at the inner state. """ const boundary_condition_do_nothing = BoundaryConditionDoNothing() -Base.show(io::IO, ::BoundaryConditionDoNothing) = print(io, "boundary_condition_do_nothing") - +function Base.show(io::IO, ::BoundaryConditionDoNothing) + print(io, "boundary_condition_do_nothing") +end end # @muladd diff --git a/src/callbacks_stage/callbacks_stage.jl b/src/callbacks_stage/callbacks_stage.jl index f23f96eccf8..7609f9b341d 100644 --- a/src/callbacks_stage/callbacks_stage.jl +++ b/src/callbacks_stage/callbacks_stage.jl @@ -3,9 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent include("positivity_zhang_shu.jl") - - end # @muladd diff --git a/src/callbacks_stage/positivity_zhang_shu.jl b/src/callbacks_stage/positivity_zhang_shu.jl index c3156ae4833..92141c4b26e 100644 --- a/src/callbacks_stage/positivity_zhang_shu.jl +++ b/src/callbacks_stage/positivity_zhang_shu.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ PositivityPreservingLimiterZhangShu(; threshold, variables) @@ -18,24 +18,26 @@ using the associated `thresholds` to determine the minimal acceptable values. The order of the `variables` is important and might have a strong influence on the robustness. """ -struct PositivityPreservingLimiterZhangShu{N, Thresholds<:NTuple{N,<:Real}, Variables<:NTuple{N,Any}} - thresholds::Thresholds - variables::Variables +struct PositivityPreservingLimiterZhangShu{N, Thresholds <: NTuple{N, <:Real}, + Variables <: NTuple{N, Any}} + thresholds::Thresholds + variables::Variables end function PositivityPreservingLimiterZhangShu(; thresholds, variables) - PositivityPreservingLimiterZhangShu(thresholds, variables) + PositivityPreservingLimiterZhangShu(thresholds, variables) end - -function (limiter!::PositivityPreservingLimiterZhangShu)( - u_ode, integrator, semi::AbstractSemidiscretization, t) - u = wrap_array(u_ode, semi) - @trixi_timeit timer() "positivity-preserving limiter" limiter_zhang_shu!( - u, limiter!.thresholds, limiter!.variables, mesh_equations_solver_cache(semi)...) +function (limiter!::PositivityPreservingLimiterZhangShu)(u_ode, integrator, + semi::AbstractSemidiscretization, + t) + u = wrap_array(u_ode, semi) + @trixi_timeit timer() "positivity-preserving limiter" begin + limiter_zhang_shu!(u, limiter!.thresholds, limiter!.variables, + mesh_equations_solver_cache(semi)...) + end end - # Iterate over tuples in a type-stable way using "lispy tuple programming", # similar to https://stackoverflow.com/a/55849398: # Iterating over tuples of different functions isn't type-stable in general @@ -44,28 +46,26 @@ end # Note that you shouldn't use this with too many elements per tuple since the # compile times can increase otherwise - but a handful of elements per tuple # is definitely fine. -function limiter_zhang_shu!(u, thresholds::NTuple{N,<:Real}, variables::NTuple{N,Any}, +function limiter_zhang_shu!(u, thresholds::NTuple{N, <:Real}, variables::NTuple{N, Any}, mesh, equations, solver, cache) where {N} - threshold = first(thresholds) - remaining_thresholds = Base.tail(thresholds) - variable = first(variables) - remaining_variables = Base.tail(variables) + threshold = first(thresholds) + remaining_thresholds = Base.tail(thresholds) + variable = first(variables) + remaining_variables = Base.tail(variables) - limiter_zhang_shu!(u, threshold, variable, mesh, equations, solver, cache) - limiter_zhang_shu!(u, remaining_thresholds, remaining_variables, mesh, equations, solver, cache) - return nothing + limiter_zhang_shu!(u, threshold, variable, mesh, equations, solver, cache) + limiter_zhang_shu!(u, remaining_thresholds, remaining_variables, mesh, equations, + solver, cache) + return nothing end # terminate the type-stable iteration over tuples function limiter_zhang_shu!(u, thresholds::Tuple{}, variables::Tuple{}, mesh, equations, solver, cache) - nothing + nothing end - include("positivity_zhang_shu_dg1d.jl") include("positivity_zhang_shu_dg2d.jl") include("positivity_zhang_shu_dg3d.jl") - - end # @muladd diff --git a/src/callbacks_stage/positivity_zhang_shu_dg1d.jl b/src/callbacks_stage/positivity_zhang_shu_dg1d.jl index 50d6b3f2c31..7797eb95b09 100644 --- a/src/callbacks_stage/positivity_zhang_shu_dg1d.jl +++ b/src/callbacks_stage/positivity_zhang_shu_dg1d.jl @@ -3,45 +3,43 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function limiter_zhang_shu!(u, threshold::Real, variable, mesh::AbstractMesh{1}, equations, dg::DGSEM, cache) - @unpack weights = dg.basis - - @threaded for element in eachelement(dg, cache) - # determine minimum value - value_min = typemax(eltype(u)) - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - value_min = min(value_min, variable(u_node, equations)) - end - - # detect if limiting is necessary - value_min < threshold || continue - - # compute mean value - u_mean = zero(get_node_vars(u, equations, dg, 1, element)) - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - u_mean += u_node * weights[i] + @unpack weights = dg.basis + + @threaded for element in eachelement(dg, cache) + # determine minimum value + value_min = typemax(eltype(u)) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + value_min = min(value_min, variable(u_node, equations)) + end + + # detect if limiting is necessary + value_min < threshold || continue + + # compute mean value + u_mean = zero(get_node_vars(u, equations, dg, 1, element)) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + u_mean += u_node * weights[i] + end + # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 + u_mean = u_mean / 2^ndims(mesh) + + # We compute the value directly with the mean values, as we assume that + # Jensen's inequality holds (e.g. pressure for compressible Euler equations). + value_mean = variable(u_mean, equations) + theta = (value_mean - threshold) / (value_mean - value_min) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + set_node_vars!(u, theta * u_node + (1 - theta) * u_mean, + equations, dg, i, element) + end end - # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 - u_mean = u_mean / 2^ndims(mesh) - # We compute the value directly with the mean values, as we assume that - # Jensen's inequality holds (e.g. pressure for compressible Euler equations). - value_mean = variable(u_mean, equations) - theta = (value_mean - threshold) / (value_mean - value_min) - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - set_node_vars!(u, theta * u_node + (1-theta) * u_mean, - equations, dg, i, element) - end - end - - return nothing + return nothing end - - end # @muladd diff --git a/src/callbacks_stage/positivity_zhang_shu_dg2d.jl b/src/callbacks_stage/positivity_zhang_shu_dg2d.jl index ae5b7371920..b37ed9c49d5 100644 --- a/src/callbacks_stage/positivity_zhang_shu_dg2d.jl +++ b/src/callbacks_stage/positivity_zhang_shu_dg2d.jl @@ -3,45 +3,43 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function limiter_zhang_shu!(u, threshold::Real, variable, mesh::AbstractMesh{2}, equations, dg::DGSEM, cache) - @unpack weights = dg.basis - - @threaded for element in eachelement(dg, cache) - # determine minimum value - value_min = typemax(eltype(u)) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - value_min = min(value_min, variable(u_node, equations)) - end - - # detect if limiting is necessary - value_min < threshold || continue - - # compute mean value - u_mean = zero(get_node_vars(u, equations, dg, 1, 1, element)) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - u_mean += u_node * weights[i] * weights[j] + @unpack weights = dg.basis + + @threaded for element in eachelement(dg, cache) + # determine minimum value + value_min = typemax(eltype(u)) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + value_min = min(value_min, variable(u_node, equations)) + end + + # detect if limiting is necessary + value_min < threshold || continue + + # compute mean value + u_mean = zero(get_node_vars(u, equations, dg, 1, 1, element)) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + u_mean += u_node * weights[i] * weights[j] + end + # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 + u_mean = u_mean / 2^ndims(mesh) + + # We compute the value directly with the mean values, as we assume that + # Jensen's inequality holds (e.g. pressure for compressible Euler equations). + value_mean = variable(u_mean, equations) + theta = (value_mean - threshold) / (value_mean - value_min) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + set_node_vars!(u, theta * u_node + (1 - theta) * u_mean, + equations, dg, i, j, element) + end end - # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 - u_mean = u_mean / 2^ndims(mesh) - # We compute the value directly with the mean values, as we assume that - # Jensen's inequality holds (e.g. pressure for compressible Euler equations). - value_mean = variable(u_mean, equations) - theta = (value_mean - threshold) / (value_mean - value_min) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - set_node_vars!(u, theta * u_node + (1-theta) * u_mean, - equations, dg, i, j, element) - end - end - - return nothing + return nothing end - - end # @muladd diff --git a/src/callbacks_stage/positivity_zhang_shu_dg3d.jl b/src/callbacks_stage/positivity_zhang_shu_dg3d.jl index d2e46dc7d88..773a236d831 100644 --- a/src/callbacks_stage/positivity_zhang_shu_dg3d.jl +++ b/src/callbacks_stage/positivity_zhang_shu_dg3d.jl @@ -3,45 +3,43 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function limiter_zhang_shu!(u, threshold::Real, variable, mesh::AbstractMesh{3}, equations, dg::DGSEM, cache) - @unpack weights = dg.basis - - @threaded for element in eachelement(dg, cache) - # determine minimum value - value_min = typemax(eltype(u)) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - value_min = min(value_min, variable(u_node, equations)) - end - - # detect if limiting is necessary - value_min < threshold || continue - - # compute mean value - u_mean = zero(get_node_vars(u, equations, dg, 1, 1, 1, element)) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - u_mean += u_node * weights[i] * weights[j] * weights[k] + @unpack weights = dg.basis + + @threaded for element in eachelement(dg, cache) + # determine minimum value + value_min = typemax(eltype(u)) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + value_min = min(value_min, variable(u_node, equations)) + end + + # detect if limiting is necessary + value_min < threshold || continue + + # compute mean value + u_mean = zero(get_node_vars(u, equations, dg, 1, 1, 1, element)) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + u_mean += u_node * weights[i] * weights[j] * weights[k] + end + # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 + u_mean = u_mean / 2^ndims(mesh) + + # We compute the value directly with the mean values, as we assume that + # Jensen's inequality holds (e.g. pressure for compressible Euler equations). + value_mean = variable(u_mean, equations) + theta = (value_mean - threshold) / (value_mean - value_min) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + set_node_vars!(u, theta * u_node + (1 - theta) * u_mean, + equations, dg, i, j, k, element) + end end - # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2 - u_mean = u_mean / 2^ndims(mesh) - # We compute the value directly with the mean values, as we assume that - # Jensen's inequality holds (e.g. pressure for compressible Euler equations). - value_mean = variable(u_mean, equations) - theta = (value_mean - threshold) / (value_mean - value_min) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - set_node_vars!(u, theta * u_node + (1-theta) * u_mean, - equations, dg, i, j, k, element) - end - end - - return nothing + return nothing end - - end # @muladd diff --git a/src/callbacks_step/alive.jl b/src/callbacks_step/alive.jl index 1417dc3bef7..eeacd9681d8 100644 --- a/src/callbacks_step/alive.jl +++ b/src/callbacks_step/alive.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ AliveCallback(analysis_interval=0, alive_interval=analysis_interval÷10) @@ -14,91 +14,85 @@ time steps. If `analysis_interval ≂̸ 0`, the output is omitted every `analysis_interval` time steps. """ mutable struct AliveCallback - start_time::Float64 - alive_interval::Int - analysis_interval::Int + start_time::Float64 + alive_interval::Int + analysis_interval::Int end -function AliveCallback(; analysis_interval=0, - alive_interval=analysis_interval÷10) - - alive_callback = AliveCallback(0.0, alive_interval, analysis_interval) +function AliveCallback(; analysis_interval = 0, + alive_interval = analysis_interval ÷ 10) + alive_callback = AliveCallback(0.0, alive_interval, analysis_interval) - DiscreteCallback(alive_callback, alive_callback, # the first one is the condition, the second the affect! - save_positions=(false,false), - initialize=initialize!) + DiscreteCallback(alive_callback, alive_callback, # the first one is the condition, the second the affect! + save_positions = (false, false), + initialize = initialize!) end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AliveCallback}) - @nospecialize cb # reduce precompilation time - - alive_callback = cb.affect! - print(io, "AliveCallback(alive_interval=", alive_callback.alive_interval, ")") -end - -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AliveCallback}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - if get(io, :compact, false) - show(io, cb) - else alive_callback = cb.affect! - - setup = [ - "interval" => alive_callback.alive_interval, - ] - summary_box(io, "AliveCallback", setup) - end + print(io, "AliveCallback(alive_interval=", alive_callback.alive_interval, ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:AliveCallback}) + @nospecialize cb # reduce precompilation time + if get(io, :compact, false) + show(io, cb) + else + alive_callback = cb.affect! -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:AliveCallback} - - alive_callback = cb.affect! - alive_callback.start_time = time_ns() - return nothing + setup = [ + "interval" => alive_callback.alive_interval, + ] + summary_box(io, "AliveCallback", setup) + end end +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: AliveCallback} + alive_callback = cb.affect! + alive_callback.start_time = time_ns() + return nothing +end # this method is called to determine whether the callback should be activated function (alive_callback::AliveCallback)(u, t, integrator) - @unpack alive_interval, analysis_interval = alive_callback - - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - return alive_interval > 0 && ( - (integrator.stats.naccept % alive_interval == 0 && - !(integrator.stats.naccept == 0 && integrator.iter > 0) && - (analysis_interval == 0 || integrator.stats.naccept % analysis_interval != 0)) || - isfinished(integrator)) + @unpack alive_interval, analysis_interval = alive_callback + + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + return alive_interval > 0 && ((integrator.stats.naccept % alive_interval == 0 && + !(integrator.stats.naccept == 0 && integrator.iter > 0) && + (analysis_interval == 0 || + integrator.stats.naccept % analysis_interval != 0)) || + isfinished(integrator)) end - # this method is called when the callback is activated function (alive_callback::AliveCallback)(integrator) - # Checking for floating point equality is OK here as `DifferentialEquations.jl` - # sets the time exactly to the final time in the last iteration - if isfinished(integrator) && mpi_isroot() - println("─"^100) - println("Trixi.jl simulation finished. Final time: ", integrator.t, - " Time steps: ", integrator.stats.naccept, " (accepted), ", integrator.iter, " (total)") - println("─"^100) - println() - elseif mpi_isroot() - runtime_absolute = 1.0e-9 * (time_ns() - alive_callback.start_time) - @printf("#timesteps: %6d │ Δt: %.4e │ sim. time: %.4e │ run time: %.4e s\n", - integrator.stats.naccept, integrator.dt, integrator.t, runtime_absolute) - end - - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + # Checking for floating point equality is OK here as `DifferentialEquations.jl` + # sets the time exactly to the final time in the last iteration + if isfinished(integrator) && mpi_isroot() + println("─"^100) + println("Trixi.jl simulation finished. Final time: ", integrator.t, + " Time steps: ", integrator.stats.naccept, " (accepted), ", + integrator.iter, " (total)") + println("─"^100) + println() + elseif mpi_isroot() + runtime_absolute = 1.0e-9 * (time_ns() - alive_callback.start_time) + @printf("#timesteps: %6d │ Δt: %.4e │ sim. time: %.4e │ run time: %.4e s\n", + integrator.stats.naccept, integrator.dt, integrator.t, runtime_absolute) + end + + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end - - end # @muladd diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl index 4655a0b9ef6..d6e19b79886 100644 --- a/src/callbacks_step/amr.jl +++ b/src/callbacks_step/amr.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ AMRCallback(semi, controller [,adaptor=AdaptorAMR(semi)]; @@ -16,64 +16,67 @@ Performs adaptive mesh refinement (AMR) every `interval` time steps for a given semidiscretization `semi` using the chosen `controller`. """ struct AMRCallback{Controller, Adaptor, Cache} - controller::Controller - interval::Int - adapt_initial_condition::Bool - adapt_initial_condition_only_refine::Bool - dynamic_load_balancing::Bool - adaptor::Adaptor - amr_cache::Cache + controller::Controller + interval::Int + adapt_initial_condition::Bool + adapt_initial_condition_only_refine::Bool + dynamic_load_balancing::Bool + adaptor::Adaptor + amr_cache::Cache end - function AMRCallback(semi, controller, adaptor; interval, - adapt_initial_condition=true, - adapt_initial_condition_only_refine=true, - dynamic_load_balancing=true) - # check arguments - if !(interval isa Integer && interval >= 0) - throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)")) - end - - # AMR every `interval` time steps, but not after the final step - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - if interval > 0 - condition = (u, t, integrator) -> ( (integrator.stats.naccept % interval == 0) && - !(integrator.stats.naccept == 0 && integrator.iter > 0) && - !isfinished(integrator) ) - else # disable the AMR callback except possibly for initial refinement during initialization - condition = (u, t, integrator) -> false - end - - to_refine = Int[] - to_coarsen = Int[] - amr_cache = (; to_refine, to_coarsen) - - amr_callback = AMRCallback{typeof(controller), typeof(adaptor), typeof(amr_cache)}( - controller, interval, adapt_initial_condition, adapt_initial_condition_only_refine, - dynamic_load_balancing, adaptor, amr_cache) - - DiscreteCallback(condition, amr_callback, - save_positions=(false,false), - initialize=initialize!) + adapt_initial_condition = true, + adapt_initial_condition_only_refine = true, + dynamic_load_balancing = true) + # check arguments + if !(interval isa Integer && interval >= 0) + throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)")) + end + + # AMR every `interval` time steps, but not after the final step + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + if interval > 0 + condition = (u, t, integrator) -> ((integrator.stats.naccept % interval == 0) && + !(integrator.stats.naccept == 0 && + integrator.iter > 0) && + !isfinished(integrator)) + else # disable the AMR callback except possibly for initial refinement during initialization + condition = (u, t, integrator) -> false + end + + to_refine = Int[] + to_coarsen = Int[] + amr_cache = (; to_refine, to_coarsen) + + amr_callback = AMRCallback{typeof(controller), typeof(adaptor), typeof(amr_cache)}(controller, + interval, + adapt_initial_condition, + adapt_initial_condition_only_refine, + dynamic_load_balancing, + adaptor, + amr_cache) + + DiscreteCallback(condition, amr_callback, + save_positions = (false, false), + initialize = initialize!) end function AMRCallback(semi, controller; kwargs...) - adaptor = AdaptorAMR(semi) - AMRCallback(semi, controller, adaptor; kwargs...) + adaptor = AdaptorAMR(semi) + AMRCallback(semi, controller, adaptor; kwargs...) end function AdaptorAMR(semi; kwargs...) - mesh, _, solver, _ = mesh_equations_solver_cache(semi) - AdaptorAMR(mesh, solver; kwargs...) + mesh, _, solver, _ = mesh_equations_solver_cache(semi) + AdaptorAMR(mesh, solver; kwargs...) end - # TODO: Taal bikeshedding, implement a method with less information and the signature # function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AMRCallback}) # @nospecialize cb # reduce precompilation time @@ -81,27 +84,30 @@ end # amr_callback = cb.affect! # print(io, "AMRCallback") # end -function Base.show(io::IO, mime::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AMRCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else - amr_callback = cb.affect! +function Base.show(io::IO, mime::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:AMRCallback}) + @nospecialize cb # reduce precompilation time - summary_header(io, "AMRCallback") - summary_line(io, "controller", amr_callback.controller |> typeof |> nameof) - show(increment_indent(io), mime, amr_callback.controller) - summary_line(io, "interval", amr_callback.interval) - summary_line(io, "adapt IC", amr_callback.adapt_initial_condition ? "yes" : "no",) - if amr_callback.adapt_initial_condition - summary_line(io, "│ only refine", amr_callback.adapt_initial_condition_only_refine ? "yes" : "no") + if get(io, :compact, false) + show(io, cb) + else + amr_callback = cb.affect! + + summary_header(io, "AMRCallback") + summary_line(io, "controller", amr_callback.controller |> typeof |> nameof) + show(increment_indent(io), mime, amr_callback.controller) + summary_line(io, "interval", amr_callback.interval) + summary_line(io, "adapt IC", + amr_callback.adapt_initial_condition ? "yes" : "no") + if amr_callback.adapt_initial_condition + summary_line(io, "│ only refine", + amr_callback.adapt_initial_condition_only_refine ? "yes" : + "no") + end + summary_footer(io) end - summary_footer(io) - end end - # The function below is used to control the output depending on whether or not AMR is enabled. """ uses_amr(callback) @@ -110,37 +116,39 @@ Checks whether the provided callback or `CallbackSet` is an [`AMRCallback`](@ref or contains one. """ uses_amr(cb) = false -uses_amr(cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:AMRCallback} = true +function uses_amr(cb::DiscreteCallback{Condition, Affect!}) where {Condition, + Affect! <: + AMRCallback} + true +end uses_amr(callbacks::CallbackSet) = mapreduce(uses_amr, |, callbacks.discrete_callbacks) - function get_element_variables!(element_variables, u, mesh, equations, solver, cache, amr_callback::AMRCallback; kwargs...) - get_element_variables!(element_variables, u, mesh, equations, solver, cache, - amr_callback.controller, amr_callback; kwargs...) + get_element_variables!(element_variables, u, mesh, equations, solver, cache, + amr_callback.controller, amr_callback; kwargs...) end - -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:AMRCallback} - amr_callback = cb.affect! - semi = integrator.p - - @trixi_timeit timer() "initial condition AMR" if amr_callback.adapt_initial_condition - # iterate until mesh does not change anymore - has_changed = amr_callback(integrator, - only_refine=amr_callback.adapt_initial_condition_only_refine) - while has_changed - compute_coefficients!(integrator.u, t, semi) - u_modified!(integrator, true) - has_changed = amr_callback(integrator, - only_refine=amr_callback.adapt_initial_condition_only_refine) +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: AMRCallback} + amr_callback = cb.affect! + semi = integrator.p + + @trixi_timeit timer() "initial condition AMR" if amr_callback.adapt_initial_condition + # iterate until mesh does not change anymore + has_changed = amr_callback(integrator, + only_refine = amr_callback.adapt_initial_condition_only_refine) + while has_changed + compute_coefficients!(integrator.u, t, semi) + u_modified!(integrator, true) + has_changed = amr_callback(integrator, + only_refine = amr_callback.adapt_initial_condition_only_refine) + end end - end - return nothing + return nothing end - # TODO: Taal remove? # function (cb::DiscreteCallback{Condition,Affect!})(ode::ODEProblem) where {Condition, Affect!<:AMRCallback} # amr_callback = cb.affect! @@ -159,35 +167,31 @@ end # return nothing # end - function (amr_callback::AMRCallback)(integrator; kwargs...) - u_ode = integrator.u - semi = integrator.p - - @trixi_timeit timer() "AMR" begin - has_changed = amr_callback(u_ode, semi, - integrator.t, integrator.iter; kwargs...) - if has_changed - resize!(integrator, length(u_ode)) - u_modified!(integrator, true) + u_ode = integrator.u + semi = integrator.p + + @trixi_timeit timer() "AMR" begin + has_changed = amr_callback(u_ode, semi, + integrator.t, integrator.iter; kwargs...) + if has_changed + resize!(integrator, length(u_ode)) + u_modified!(integrator, true) + end end - end - return has_changed + return has_changed end - @inline function (amr_callback::AMRCallback)(u_ode::AbstractVector, semi::SemidiscretizationHyperbolic, t, iter; kwargs...) - # Note that we don't `wrap_array` the vector `u_ode` to be able to `resize!` - # it when doing AMR while still dispatching on the `mesh` etc. - amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi, t, iter; kwargs...) + # Note that we don't `wrap_array` the vector `u_ode` to be able to `resize!` + # it when doing AMR while still dispatching on the `mesh` etc. + amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi, t, iter; kwargs...) end - - # `passive_args` is currently used for Euler with self-gravity to adapt the gravity solver # passively without querying its indicator, based on the assumption that both solvers use # the same mesh. That's a hack and should be improved in the future once we have more examples @@ -197,292 +201,312 @@ end function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh, equations, dg::DG, cache, semi, t, iter; - only_refine=false, only_coarsen=false, - passive_args=()) - @unpack controller, adaptor = amr_callback - - u = wrap_array(u_ode, mesh, equations, dg, cache) - lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache, - t=t, iter=iter) - - if mpi_isparallel() - # Collect lambda for all elements - lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache)) - # Use parent because n_elements_by_rank is an OffsetArray - recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank)) - MPI.Allgatherv!(lambda, recvbuf, mpi_comm()) - lambda = lambda_global - end - - leaf_cell_ids = leaf_cells(mesh.tree) - @boundscheck begin - @assert axes(lambda) == axes(leaf_cell_ids) ("Indicator (axes = $(axes(lambda))) and leaf cell (axes = $(axes(leaf_cell_ids))) arrays have different axes") - end - - @unpack to_refine, to_coarsen = amr_callback.amr_cache - empty!(to_refine) - empty!(to_coarsen) - for element in 1:length(lambda) - controller_value = lambda[element] - if controller_value > 0 - push!(to_refine, leaf_cell_ids[element]) - elseif controller_value < 0 - push!(to_coarsen, leaf_cell_ids[element]) + only_refine = false, only_coarsen = false, + passive_args = ()) + @unpack controller, adaptor = amr_callback + + u = wrap_array(u_ode, mesh, equations, dg, cache) + lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache, + t = t, iter = iter) + + if mpi_isparallel() + # Collect lambda for all elements + lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache)) + # Use parent because n_elements_by_rank is an OffsetArray + recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank)) + MPI.Allgatherv!(lambda, recvbuf, mpi_comm()) + lambda = lambda_global end - end - - - @trixi_timeit timer() "refine" if !only_coarsen && !isempty(to_refine) - # refine mesh - refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh.tree, to_refine) - # Find all indices of elements whose cell ids are in refined_original_cells - elements_to_refine = findall(in(refined_original_cells), cache.elements.cell_ids) - - # refine solver - @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_refine) - for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args - @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, p_equations, p_dg, p_cache, elements_to_refine) + leaf_cell_ids = leaf_cells(mesh.tree) + @boundscheck begin + @assert axes(lambda)==axes(leaf_cell_ids) ("Indicator (axes = $(axes(lambda))) and leaf cell (axes = $(axes(leaf_cell_ids))) arrays have different axes") end - else - # If there is nothing to refine, create empty array for later use - refined_original_cells = Int[] - end - - @trixi_timeit timer() "coarsen" if !only_refine && !isempty(to_coarsen) - # Since the cells may have been shifted due to refinement, first we need to - # translate the old cell ids to the new cell ids - if !isempty(to_coarsen) - to_coarsen = original2refined(to_coarsen, refined_original_cells, mesh) + @unpack to_refine, to_coarsen = amr_callback.amr_cache + empty!(to_refine) + empty!(to_coarsen) + for element in 1:length(lambda) + controller_value = lambda[element] + if controller_value > 0 + push!(to_refine, leaf_cell_ids[element]) + elseif controller_value < 0 + push!(to_coarsen, leaf_cell_ids[element]) + end end - # Next, determine the parent cells from which the fine cells are to be - # removed, since these are needed for the coarsen! function. However, since - # we only want to coarsen if *all* child cells are marked for coarsening, - # we count the coarsening indicators for each parent cell and only coarsen - # if all children are marked as such (i.e., where the count is 2^ndims). At - # the same time, check if a cell is marked for coarsening even though it is - # *not* a leaf cell -> this can only happen if it was refined due to 2:1 - # smoothing during the preceding refinement operation. - parents_to_coarsen = zeros(Int, length(mesh.tree)) - for cell_id in to_coarsen - # If cell has no parent, it cannot be coarsened - if !has_parent(mesh.tree, cell_id) - continue - end - - # If cell is not leaf (anymore), it cannot be coarsened - if !is_leaf(mesh.tree, cell_id) - continue - end - - # Increase count for parent cell - parent_id = mesh.tree.parent_ids[cell_id] - parents_to_coarsen[parent_id] += 1 + @trixi_timeit timer() "refine" if !only_coarsen && !isempty(to_refine) + # refine mesh + refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh.tree, + to_refine) + + # Find all indices of elements whose cell ids are in refined_original_cells + elements_to_refine = findall(in(refined_original_cells), + cache.elements.cell_ids) + + # refine solver + @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, + cache, elements_to_refine) + for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args + @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, + p_equations, p_dg, p_cache, + elements_to_refine) + end + else + # If there is nothing to refine, create empty array for later use + refined_original_cells = Int[] end - # Extract only those parent cells for which all children should be coarsened - to_coarsen = collect(1:length(parents_to_coarsen))[parents_to_coarsen .== 2^ndims(mesh)] - - # Finally, coarsen mesh - coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh.tree, to_coarsen) + @trixi_timeit timer() "coarsen" if !only_refine && !isempty(to_coarsen) + # Since the cells may have been shifted due to refinement, first we need to + # translate the old cell ids to the new cell ids + if !isempty(to_coarsen) + to_coarsen = original2refined(to_coarsen, refined_original_cells, mesh) + end + + # Next, determine the parent cells from which the fine cells are to be + # removed, since these are needed for the coarsen! function. However, since + # we only want to coarsen if *all* child cells are marked for coarsening, + # we count the coarsening indicators for each parent cell and only coarsen + # if all children are marked as such (i.e., where the count is 2^ndims). At + # the same time, check if a cell is marked for coarsening even though it is + # *not* a leaf cell -> this can only happen if it was refined due to 2:1 + # smoothing during the preceding refinement operation. + parents_to_coarsen = zeros(Int, length(mesh.tree)) + for cell_id in to_coarsen + # If cell has no parent, it cannot be coarsened + if !has_parent(mesh.tree, cell_id) + continue + end + + # If cell is not leaf (anymore), it cannot be coarsened + if !is_leaf(mesh.tree, cell_id) + continue + end + + # Increase count for parent cell + parent_id = mesh.tree.parent_ids[cell_id] + parents_to_coarsen[parent_id] += 1 + end + + # Extract only those parent cells for which all children should be coarsened + to_coarsen = collect(1:length(parents_to_coarsen))[parents_to_coarsen .== 2^ndims(mesh)] + + # Finally, coarsen mesh + coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh.tree, + to_coarsen) + + # Convert coarsened parent cell ids to the list of child cell ids that have + # been removed, since this is the information that is expected by the solver + removed_child_cells = zeros(Int, + n_children_per_cell(mesh.tree) * + length(coarsened_original_cells)) + for (index, coarse_cell_id) in enumerate(coarsened_original_cells) + for child in 1:n_children_per_cell(mesh.tree) + removed_child_cells[n_children_per_cell(mesh.tree) * (index - 1) + child] = coarse_cell_id + + child + end + end + + # Find all indices of elements whose cell ids are in removed_child_cells + elements_to_remove = findall(in(removed_child_cells), cache.elements.cell_ids) + + # coarsen solver + @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, + cache, elements_to_remove) + for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args + @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, + p_equations, p_dg, p_cache, + elements_to_remove) + end + else + # If there is nothing to coarsen, create empty array for later use + coarsened_original_cells = Int[] + end - # Convert coarsened parent cell ids to the list of child cell ids that have - # been removed, since this is the information that is expected by the solver - removed_child_cells = zeros(Int, n_children_per_cell(mesh.tree) * length(coarsened_original_cells)) - for (index, coarse_cell_id) in enumerate(coarsened_original_cells) - for child in 1:n_children_per_cell(mesh.tree) - removed_child_cells[n_children_per_cell(mesh.tree) * (index-1) + child] = coarse_cell_id + child - end + # Store whether there were any cells coarsened or refined + has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells) + if has_changed # TODO: Taal decide, where shall we set this? + # don't set it to has_changed since there can be changes from earlier calls + mesh.unsaved_changes = true end - # Find all indices of elements whose cell ids are in removed_child_cells - elements_to_remove = findall(in(removed_child_cells), cache.elements.cell_ids) + # Dynamically balance computational load by first repartitioning the mesh and then redistributing the cells/elements + if has_changed && mpi_isparallel() && amr_callback.dynamic_load_balancing + @trixi_timeit timer() "dynamic load balancing" begin + old_mpi_ranks_per_cell = copy(mesh.tree.mpi_ranks) - # coarsen solver - @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_remove) - for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args - @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, p_equations, p_dg, p_cache, elements_to_remove) - end - else - # If there is nothing to coarsen, create empty array for later use - coarsened_original_cells = Int[] - end - - # Store whether there were any cells coarsened or refined - has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells) - if has_changed # TODO: Taal decide, where shall we set this? - # don't set it to has_changed since there can be changes from earlier calls - mesh.unsaved_changes = true - end - - # Dynamically balance computational load by first repartitioning the mesh and then redistributing the cells/elements - if has_changed && mpi_isparallel() && amr_callback.dynamic_load_balancing - @trixi_timeit timer() "dynamic load balancing" begin - old_mpi_ranks_per_cell = copy(mesh.tree.mpi_ranks) - - partition!(mesh) - - rebalance_solver!(u_ode, mesh, equations, dg, cache, old_mpi_ranks_per_cell) + partition!(mesh) + + rebalance_solver!(u_ode, mesh, equations, dg, cache, old_mpi_ranks_per_cell) + end end - end - # Return true if there were any cells coarsened or refined, otherwise false - return has_changed + # Return true if there were any cells coarsened or refined, otherwise false + return has_changed end - # Copy controller values to quad user data storage, will be called below function copy_to_quad_iter_volume(info, user_data) - info_obj = unsafe_load(info) - - # Load tree from global trees array, one-based indexing - tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset - # Global quad ID - quad_id = offset + info_obj.quadid - - # Access user_data = lambda - user_data_ptr = Ptr{Int}(user_data) - # Load controller_value = lambda[quad_id + 1] - controller_value = unsafe_load(user_data_ptr, quad_id + 1) - - # Access quadrant's user data ([global quad ID, controller_value]) - quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) - # Save controller value to quadrant's user data. - unsafe_store!(quad_data_ptr, controller_value, 2) - - return nothing + info_obj = unsafe_load(info) + + # Load tree from global trees array, one-based indexing + tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset + # Global quad ID + quad_id = offset + info_obj.quadid + + # Access user_data = lambda + user_data_ptr = Ptr{Int}(user_data) + # Load controller_value = lambda[quad_id + 1] + controller_value = unsafe_load(user_data_ptr, quad_id + 1) + + # Access quadrant's user data ([global quad ID, controller_value]) + quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) + # Save controller value to quadrant's user data. + unsafe_store!(quad_data_ptr, controller_value, 2) + + return nothing end # 2D -cfunction(::typeof(copy_to_quad_iter_volume), ::Val{2}) = @cfunction(copy_to_quad_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(copy_to_quad_iter_volume), ::Val{2}) + @cfunction(copy_to_quad_iter_volume, Cvoid, + (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(copy_to_quad_iter_volume), ::Val{3}) = @cfunction(copy_to_quad_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(copy_to_quad_iter_volume), ::Val{3}) + @cfunction(copy_to_quad_iter_volume, Cvoid, + (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +end function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::P4estMesh, equations, dg::DG, cache, semi, t, iter; - only_refine=false, only_coarsen=false, - passive_args=()) - @unpack controller, adaptor = amr_callback - - u = wrap_array(u_ode, mesh, equations, dg, cache) - lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache, - t=t, iter=iter) - - @boundscheck begin - @assert axes(lambda) == (Base.OneTo(ncells(mesh)),) ( - "Indicator array (axes = $(axes(lambda))) and mesh cells (axes = $(Base.OneTo(ncells(mesh)))) have different axes" - ) - end - - # Copy controller value of each quad to the quad's user data storage - iter_volume_c = cfunction(copy_to_quad_iter_volume, Val(ndims(mesh))) - - # The pointer to lambda will be interpreted as Ptr{Int} above - @assert lambda isa Vector{Int} - iterate_p4est(mesh.p4est, lambda; iter_volume_c=iter_volume_c) - - @trixi_timeit timer() "refine" if !only_coarsen - # Refine mesh - refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh) - - # Refine solver - @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, cache, - refined_original_cells) - for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args - @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, p_equations, - p_dg, p_cache, refined_original_cells) + only_refine = false, only_coarsen = false, + passive_args = ()) + @unpack controller, adaptor = amr_callback + + u = wrap_array(u_ode, mesh, equations, dg, cache) + lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache, + t = t, iter = iter) + + @boundscheck begin + @assert axes(lambda)==(Base.OneTo(ncells(mesh)),) ("Indicator array (axes = $(axes(lambda))) and mesh cells (axes = $(Base.OneTo(ncells(mesh)))) have different axes") end - else - # If there is nothing to refine, create empty array for later use - refined_original_cells = Int[] - end - - @trixi_timeit timer() "coarsen" if !only_refine - # Coarsen mesh - coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh) - - # coarsen solver - @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, cache, - coarsened_original_cells) - for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args - @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, p_equations, - p_dg, p_cache, coarsened_original_cells) + + # Copy controller value of each quad to the quad's user data storage + iter_volume_c = cfunction(copy_to_quad_iter_volume, Val(ndims(mesh))) + + # The pointer to lambda will be interpreted as Ptr{Int} above + @assert lambda isa Vector{Int} + iterate_p4est(mesh.p4est, lambda; iter_volume_c = iter_volume_c) + + @trixi_timeit timer() "refine" if !only_coarsen + # Refine mesh + refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh) + + # Refine solver + @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, + cache, + refined_original_cells) + for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args + @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, + p_equations, + p_dg, p_cache, + refined_original_cells) + end + else + # If there is nothing to refine, create empty array for later use + refined_original_cells = Int[] end - else - # If there is nothing to coarsen, create empty array for later use - coarsened_original_cells = Int[] - end - - # Store whether there were any cells coarsened or refined and perform load balancing - has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells) - # Check if mesh changed on other processes - if mpi_isparallel() - has_changed = MPI.Allreduce!(Ref(has_changed), |, mpi_comm())[] - end - - if has_changed # TODO: Taal decide, where shall we set this? - # don't set it to has_changed since there can be changes from earlier calls - mesh.unsaved_changes = true - - if mpi_isparallel() && amr_callback.dynamic_load_balancing - @trixi_timeit timer() "dynamic load balancing" begin - global_first_quadrant = unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks() + 1) - old_global_first_quadrant = copy(global_first_quadrant) - partition!(mesh) - rebalance_solver!(u_ode, mesh, equations, dg, cache, old_global_first_quadrant) - end + + @trixi_timeit timer() "coarsen" if !only_refine + # Coarsen mesh + coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh) + + # coarsen solver + @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, + cache, + coarsened_original_cells) + for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args + @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, + p_equations, + p_dg, p_cache, + coarsened_original_cells) + end + else + # If there is nothing to coarsen, create empty array for later use + coarsened_original_cells = Int[] end - reinitialize_boundaries!(semi.boundary_conditions, cache) - end + # Store whether there were any cells coarsened or refined and perform load balancing + has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells) + # Check if mesh changed on other processes + if mpi_isparallel() + has_changed = MPI.Allreduce!(Ref(has_changed), |, mpi_comm())[] + end - # Return true if there were any cells coarsened or refined, otherwise false - return has_changed + if has_changed # TODO: Taal decide, where shall we set this? + # don't set it to has_changed since there can be changes from earlier calls + mesh.unsaved_changes = true + + if mpi_isparallel() && amr_callback.dynamic_load_balancing + @trixi_timeit timer() "dynamic load balancing" begin + global_first_quadrant = unsafe_wrap(Array, + unsafe_load(mesh.p4est).global_first_quadrant, + mpi_nranks() + 1) + old_global_first_quadrant = copy(global_first_quadrant) + partition!(mesh) + rebalance_solver!(u_ode, mesh, equations, dg, cache, + old_global_first_quadrant) + end + end + + reinitialize_boundaries!(semi.boundary_conditions, cache) + end + + # Return true if there were any cells coarsened or refined, otherwise false + return has_changed end -function reinitialize_boundaries!(boundary_conditions::UnstructuredSortedBoundaryTypes, cache) - # Reinitialize boundary types container because boundaries may have changed. - initialize!(boundary_conditions, cache) +function reinitialize_boundaries!(boundary_conditions::UnstructuredSortedBoundaryTypes, + cache) + # Reinitialize boundary types container because boundaries may have changed. + initialize!(boundary_conditions, cache) end function reinitialize_boundaries!(boundary_conditions, cache) - return boundary_conditions + return boundary_conditions end - # After refining cells, shift original cell ids to match new locations # Note: Assumes sorted lists of original and refined cell ids! # Note: `mesh` is only required to extract ndims function original2refined(original_cell_ids, refined_original_cells, mesh) - # Sanity check - @assert issorted(original_cell_ids) "`original_cell_ids` not sorted" - @assert issorted(refined_original_cells) "`refined_cell_ids` not sorted" - - # Create array with original cell ids (not yet shifted) - shifted_cell_ids = collect(1:original_cell_ids[end]) - - # Loop over refined original cells and apply shift for all following cells - for cell_id in refined_original_cells - # Only calculate shifts for cell ids that are relevant - if cell_id > length(shifted_cell_ids) - break + # Sanity check + @assert issorted(original_cell_ids) "`original_cell_ids` not sorted" + @assert issorted(refined_original_cells) "`refined_cell_ids` not sorted" + + # Create array with original cell ids (not yet shifted) + shifted_cell_ids = collect(1:original_cell_ids[end]) + + # Loop over refined original cells and apply shift for all following cells + for cell_id in refined_original_cells + # Only calculate shifts for cell ids that are relevant + if cell_id > length(shifted_cell_ids) + break + end + + # Shift all subsequent cells by 2^ndims ids + shifted_cell_ids[(cell_id + 1):end] .+= 2^ndims(mesh) end - # Shift all subsequent cells by 2^ndims ids - shifted_cell_ids[(cell_id + 1):end] .+= 2^ndims(mesh) - end - - # Convert original cell ids to their shifted values - return shifted_cell_ids[original_cell_ids] + # Convert original cell ids to their shifted values + return shifted_cell_ids[original_cell_ids] end - - """ ControllerThreeLevel(semi, indicator; base_level=1, med_level=base_level, med_threshold=0.0, @@ -494,161 +518,169 @@ An AMR controller based on three levels (in descending order of precedence): if `med_level < 0`, set the target level to the current level - set the target level to `base_level` otherwise """ -struct ControllerThreeLevel{RealT<:Real, Indicator, Cache} - base_level::Int - med_level ::Int - max_level ::Int - med_threshold::RealT - max_threshold::RealT - indicator::Indicator - cache::Cache +struct ControllerThreeLevel{RealT <: Real, Indicator, Cache} + base_level::Int + med_level::Int + max_level::Int + med_threshold::RealT + max_threshold::RealT + indicator::Indicator + cache::Cache end -function ControllerThreeLevel(semi, indicator; base_level=1, - med_level=base_level, med_threshold=0.0, - max_level=base_level, max_threshold=1.0) - med_threshold, max_threshold = promote(med_threshold, max_threshold) - cache = create_cache(ControllerThreeLevel, semi) - ControllerThreeLevel{typeof(max_threshold), typeof(indicator), typeof(cache)}( - base_level, med_level, max_level, med_threshold, max_threshold, indicator, cache) +function ControllerThreeLevel(semi, indicator; base_level = 1, + med_level = base_level, med_threshold = 0.0, + max_level = base_level, max_threshold = 1.0) + med_threshold, max_threshold = promote(med_threshold, max_threshold) + cache = create_cache(ControllerThreeLevel, semi) + ControllerThreeLevel{typeof(max_threshold), typeof(indicator), typeof(cache)}(base_level, + med_level, + max_level, + med_threshold, + max_threshold, + indicator, + cache) end -create_cache(indicator_type::Type{ControllerThreeLevel}, semi) = create_cache(indicator_type, mesh_equations_solver_cache(semi)...) - +function create_cache(indicator_type::Type{ControllerThreeLevel}, semi) + create_cache(indicator_type, mesh_equations_solver_cache(semi)...) +end function Base.show(io::IO, controller::ControllerThreeLevel) - @nospecialize controller # reduce precompilation time - - print(io, "ControllerThreeLevel(") - print(io, controller.indicator) - print(io, ", base_level=", controller.base_level) - print(io, ", med_level=", controller.med_level) - print(io, ", max_level=", controller.max_level) - print(io, ", med_threshold=", controller.med_threshold) - print(io, ", max_threshold=", controller.max_threshold) - print(io, ")") + @nospecialize controller # reduce precompilation time + + print(io, "ControllerThreeLevel(") + print(io, controller.indicator) + print(io, ", base_level=", controller.base_level) + print(io, ", med_level=", controller.med_level) + print(io, ", max_level=", controller.max_level) + print(io, ", med_threshold=", controller.med_threshold) + print(io, ", max_threshold=", controller.max_threshold) + print(io, ")") end function Base.show(io::IO, mime::MIME"text/plain", controller::ControllerThreeLevel) - @nospecialize controller # reduce precompilation time + @nospecialize controller # reduce precompilation time - if get(io, :compact, false) - show(io, controller) - else - summary_header(io, "ControllerThreeLevel") - summary_line(io, "indicator", controller.indicator |> typeof |> nameof) - show(increment_indent(io), mime, controller.indicator) - summary_line(io, "base_level", controller.base_level) - summary_line(io, "med_level", controller.med_level) - summary_line(io, "max_level", controller.max_level) - summary_line(io, "med_threshold", controller.med_threshold) - summary_line(io, "max_threshold", controller.max_threshold) - summary_footer(io) - end + if get(io, :compact, false) + show(io, controller) + else + summary_header(io, "ControllerThreeLevel") + summary_line(io, "indicator", controller.indicator |> typeof |> nameof) + show(increment_indent(io), mime, controller.indicator) + summary_line(io, "base_level", controller.base_level) + summary_line(io, "med_level", controller.med_level) + summary_line(io, "max_level", controller.max_level) + summary_line(io, "med_threshold", controller.med_threshold) + summary_line(io, "max_threshold", controller.max_threshold) + summary_footer(io) + end end - function get_element_variables!(element_variables, u, mesh, equations, solver, cache, - controller::ControllerThreeLevel, amr_callback::AMRCallback; + controller::ControllerThreeLevel, + amr_callback::AMRCallback; kwargs...) - # call the indicator to get up-to-date values for IO - controller.indicator(u, mesh, equations, solver, cache; kwargs...) - get_element_variables!(element_variables, controller.indicator, amr_callback) + # call the indicator to get up-to-date values for IO + controller.indicator(u, mesh, equations, solver, cache; kwargs...) + get_element_variables!(element_variables, controller.indicator, amr_callback) end -function get_element_variables!(element_variables, indicator::AbstractIndicator, ::AMRCallback) - element_variables[:indicator_amr] = indicator.cache.alpha - return nothing +function get_element_variables!(element_variables, indicator::AbstractIndicator, + ::AMRCallback) + element_variables[:indicator_amr] = indicator.cache.alpha + return nothing end - function current_element_levels(mesh::TreeMesh, solver, cache) - cell_ids = cache.elements.cell_ids[eachelement(solver, cache)] + cell_ids = cache.elements.cell_ids[eachelement(solver, cache)] - return mesh.tree.levels[cell_ids] + return mesh.tree.levels[cell_ids] end - function extract_levels_iter_volume(info, user_data) - info_obj = unsafe_load(info) + info_obj = unsafe_load(info) - # Load tree from global trees array, one-based indexing - tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset - # Global quad ID - quad_id = offset + info_obj.quadid - # Julia element ID - element_id = quad_id + 1 + # Load tree from global trees array, one-based indexing + tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset + # Global quad ID + quad_id = offset + info_obj.quadid + # Julia element ID + element_id = quad_id + 1 - current_level = unsafe_load(info_obj.quad.level) + current_level = unsafe_load(info_obj.quad.level) - # Unpack user_data = current_levels and save current element level - ptr = Ptr{Int}(user_data) - unsafe_store!(ptr, current_level, element_id) + # Unpack user_data = current_levels and save current element level + ptr = Ptr{Int}(user_data) + unsafe_store!(ptr, current_level, element_id) - return nothing + return nothing end # 2D -cfunction(::typeof(extract_levels_iter_volume), ::Val{2}) = @cfunction(extract_levels_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(extract_levels_iter_volume), ::Val{2}) + @cfunction(extract_levels_iter_volume, Cvoid, + (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(extract_levels_iter_volume), ::Val{3}) = @cfunction(extract_levels_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(extract_levels_iter_volume), ::Val{3}) + @cfunction(extract_levels_iter_volume, Cvoid, + (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +end function current_element_levels(mesh::P4estMesh, solver, cache) - current_levels = Vector{Int}(undef, nelements(solver, cache)) + current_levels = Vector{Int}(undef, nelements(solver, cache)) - iter_volume_c = cfunction(extract_levels_iter_volume, Val(ndims(mesh))) - iterate_p4est(mesh.p4est, current_levels; iter_volume_c=iter_volume_c) + iter_volume_c = cfunction(extract_levels_iter_volume, Val(ndims(mesh))) + iterate_p4est(mesh.p4est, current_levels; iter_volume_c = iter_volume_c) - return current_levels + return current_levels end - # TODO: Taal refactor, merge the two loops of ControllerThreeLevel and IndicatorLöhner etc.? # But that would remove the simplest possibility to write that stuff to a file... # We could of course implement some additional logic and workarounds, but is it worth the effort? function (controller::ControllerThreeLevel)(u::AbstractArray{<:Any}, mesh, equations, dg::DG, cache; kwargs...) - - @unpack controller_value = controller.cache - resize!(controller_value, nelements(dg, cache)) - - alpha = controller.indicator(u, mesh, equations, dg, cache; kwargs...) - current_levels = current_element_levels(mesh, dg, cache) - - @threaded for element in eachelement(dg, cache) - current_level = current_levels[element] - - # set target level - target_level = current_level - if alpha[element] > controller.max_threshold - target_level = controller.max_level - elseif alpha[element] > controller.med_threshold - if controller.med_level > 0 - target_level = controller.med_level - # otherwise, target_level = current_level - # set med_level = -1 to implicitly use med_level = current_level - end - else - target_level = controller.base_level - end - - # compare target level with actual level to set controller - if current_level < target_level - controller_value[element] = 1 # refine! - elseif current_level > target_level - controller_value[element] = -1 # coarsen! - else - controller_value[element] = 0 # we're good + @unpack controller_value = controller.cache + resize!(controller_value, nelements(dg, cache)) + + alpha = controller.indicator(u, mesh, equations, dg, cache; kwargs...) + current_levels = current_element_levels(mesh, dg, cache) + + @threaded for element in eachelement(dg, cache) + current_level = current_levels[element] + + # set target level + target_level = current_level + if alpha[element] > controller.max_threshold + target_level = controller.max_level + elseif alpha[element] > controller.med_threshold + if controller.med_level > 0 + target_level = controller.med_level + # otherwise, target_level = current_level + # set med_level = -1 to implicitly use med_level = current_level + end + else + target_level = controller.base_level + end + + # compare target level with actual level to set controller + if current_level < target_level + controller_value[element] = 1 # refine! + elseif current_level > target_level + controller_value[element] = -1 # coarsen! + else + controller_value[element] = 0 # we're good + end end - end - return controller_value + return controller_value end - """ ControllerThreeLevelCombined(semi, indicator_primary, indicator_secondary; base_level=1, @@ -664,129 +696,139 @@ An AMR controller based on three levels (in descending order of precedence): If `indicator_secondary >= max_threshold_secondary`, set the target level to `max_level`. """ -struct ControllerThreeLevelCombined{RealT<:Real, IndicatorPrimary, IndicatorSecondary, Cache} - base_level::Int - med_level ::Int - max_level ::Int - med_threshold::RealT - max_threshold::RealT - max_threshold_secondary::RealT - indicator_primary::IndicatorPrimary - indicator_secondary::IndicatorSecondary - cache::Cache +struct ControllerThreeLevelCombined{RealT <: Real, IndicatorPrimary, IndicatorSecondary, + Cache} + base_level::Int + med_level::Int + max_level::Int + med_threshold::RealT + max_threshold::RealT + max_threshold_secondary::RealT + indicator_primary::IndicatorPrimary + indicator_secondary::IndicatorSecondary + cache::Cache end function ControllerThreeLevelCombined(semi, indicator_primary, indicator_secondary; - base_level=1, - med_level=base_level, med_threshold=0.0, - max_level=base_level, max_threshold=1.0, - max_threshold_secondary=1.0) - med_threshold, max_threshold, max_threshold_secondary = promote(med_threshold, max_threshold, max_threshold_secondary) - cache = create_cache(ControllerThreeLevelCombined, semi) - ControllerThreeLevelCombined{typeof(max_threshold), typeof(indicator_primary), typeof(indicator_secondary), typeof(cache)}( - base_level, med_level, max_level, med_threshold, max_threshold, - max_threshold_secondary, indicator_primary, indicator_secondary, cache) + base_level = 1, + med_level = base_level, med_threshold = 0.0, + max_level = base_level, max_threshold = 1.0, + max_threshold_secondary = 1.0) + med_threshold, max_threshold, max_threshold_secondary = promote(med_threshold, + max_threshold, + max_threshold_secondary) + cache = create_cache(ControllerThreeLevelCombined, semi) + ControllerThreeLevelCombined{typeof(max_threshold), typeof(indicator_primary), + typeof(indicator_secondary), typeof(cache)}(base_level, + med_level, + max_level, + med_threshold, + max_threshold, + max_threshold_secondary, + indicator_primary, + indicator_secondary, + cache) end -create_cache(indicator_type::Type{ControllerThreeLevelCombined}, semi) = create_cache(indicator_type, mesh_equations_solver_cache(semi)...) - +function create_cache(indicator_type::Type{ControllerThreeLevelCombined}, semi) + create_cache(indicator_type, mesh_equations_solver_cache(semi)...) +end function Base.show(io::IO, controller::ControllerThreeLevelCombined) - @nospecialize controller # reduce precompilation time - - print(io, "ControllerThreeLevelCombined(") - print(io, controller.indicator_primary) - print(io, ", ", controller.indicator_secondary) - print(io, ", base_level=", controller.base_level) - print(io, ", med_level=", controller.med_level) - print(io, ", max_level=", controller.max_level) - print(io, ", med_threshold=", controller.med_threshold) - print(io, ", max_threshold_secondary=", controller.max_threshold_secondary) - print(io, ")") -end - -function Base.show(io::IO, mime::MIME"text/plain", controller::ControllerThreeLevelCombined) - @nospecialize controller # reduce precompilation time - - if get(io, :compact, false) - show(io, controller) - else - summary_header(io, "ControllerThreeLevelCombined") - summary_line(io, "primary indicator", controller.indicator_primary |> typeof |> nameof) - show(increment_indent(io), mime, controller.indicator_primary) - summary_line(io, "secondary indicator", controller.indicator_secondary |> typeof |> nameof) - show(increment_indent(io), mime, controller.indicator_secondary) - summary_line(io, "base_level", controller.base_level) - summary_line(io, "med_level", controller.med_level) - summary_line(io, "max_level", controller.max_level) - summary_line(io, "med_threshold", controller.med_threshold) - summary_line(io, "max_threshold", controller.max_threshold) - summary_line(io, "max_threshold_secondary", controller.max_threshold_secondary) - summary_footer(io) - end + @nospecialize controller # reduce precompilation time + + print(io, "ControllerThreeLevelCombined(") + print(io, controller.indicator_primary) + print(io, ", ", controller.indicator_secondary) + print(io, ", base_level=", controller.base_level) + print(io, ", med_level=", controller.med_level) + print(io, ", max_level=", controller.max_level) + print(io, ", med_threshold=", controller.med_threshold) + print(io, ", max_threshold_secondary=", controller.max_threshold_secondary) + print(io, ")") end +function Base.show(io::IO, mime::MIME"text/plain", + controller::ControllerThreeLevelCombined) + @nospecialize controller # reduce precompilation time + + if get(io, :compact, false) + show(io, controller) + else + summary_header(io, "ControllerThreeLevelCombined") + summary_line(io, "primary indicator", + controller.indicator_primary |> typeof |> nameof) + show(increment_indent(io), mime, controller.indicator_primary) + summary_line(io, "secondary indicator", + controller.indicator_secondary |> typeof |> nameof) + show(increment_indent(io), mime, controller.indicator_secondary) + summary_line(io, "base_level", controller.base_level) + summary_line(io, "med_level", controller.med_level) + summary_line(io, "max_level", controller.max_level) + summary_line(io, "med_threshold", controller.med_threshold) + summary_line(io, "max_threshold", controller.max_threshold) + summary_line(io, "max_threshold_secondary", controller.max_threshold_secondary) + summary_footer(io) + end +end function get_element_variables!(element_variables, u, mesh, equations, solver, cache, - controller::ControllerThreeLevelCombined, amr_callback::AMRCallback; + controller::ControllerThreeLevelCombined, + amr_callback::AMRCallback; kwargs...) - # call the indicator to get up-to-date values for IO - controller.indicator_primary(u, mesh, equations, solver, cache; kwargs...) - get_element_variables!(element_variables, controller.indicator_primary, amr_callback) + # call the indicator to get up-to-date values for IO + controller.indicator_primary(u, mesh, equations, solver, cache; kwargs...) + get_element_variables!(element_variables, controller.indicator_primary, + amr_callback) end - function (controller::ControllerThreeLevelCombined)(u::AbstractArray{<:Any}, mesh, equations, dg::DG, cache; kwargs...) - - @unpack controller_value = controller.cache - resize!(controller_value, nelements(dg, cache)) - - alpha = controller.indicator_primary(u, mesh, equations, dg, cache; kwargs...) - alpha_secondary = controller.indicator_secondary(u, mesh, equations, dg, cache) - - current_levels = current_element_levels(mesh, dg, cache) - - @threaded for element in eachelement(dg, cache) - current_level = current_levels[element] - - # set target level - target_level = current_level - if alpha[element] > controller.max_threshold - target_level = controller.max_level - elseif alpha[element] > controller.med_threshold - if controller.med_level > 0 - target_level = controller.med_level - # otherwise, target_level = current_level - # set med_level = -1 to implicitly use med_level = current_level - end - else - target_level = controller.base_level + @unpack controller_value = controller.cache + resize!(controller_value, nelements(dg, cache)) + + alpha = controller.indicator_primary(u, mesh, equations, dg, cache; kwargs...) + alpha_secondary = controller.indicator_secondary(u, mesh, equations, dg, cache) + + current_levels = current_element_levels(mesh, dg, cache) + + @threaded for element in eachelement(dg, cache) + current_level = current_levels[element] + + # set target level + target_level = current_level + if alpha[element] > controller.max_threshold + target_level = controller.max_level + elseif alpha[element] > controller.med_threshold + if controller.med_level > 0 + target_level = controller.med_level + # otherwise, target_level = current_level + # set med_level = -1 to implicitly use med_level = current_level + end + else + target_level = controller.base_level + end + + if alpha_secondary[element] >= controller.max_threshold_secondary + target_level = controller.max_level + end + + # compare target level with actual level to set controller + if current_level < target_level + controller_value[element] = 1 # refine! + elseif current_level > target_level + controller_value[element] = -1 # coarsen! + else + controller_value[element] = 0 # we're good + end end - if alpha_secondary[element] >= controller.max_threshold_secondary - target_level = controller.max_level - end - - # compare target level with actual level to set controller - if current_level < target_level - controller_value[element] = 1 # refine! - elseif current_level > target_level - controller_value[element] = -1 # coarsen! - else - controller_value[element] = 0 # we're good - end - end - - return controller_value + return controller_value end - include("amr_dg.jl") include("amr_dg1d.jl") include("amr_dg2d.jl") include("amr_dg3d.jl") - - end # @muladd diff --git a/src/callbacks_step/amr_dg.jl b/src/callbacks_step/amr_dg.jl index 239b83cb562..19bbebd9254 100644 --- a/src/callbacks_step/amr_dg.jl +++ b/src/callbacks_step/amr_dg.jl @@ -3,75 +3,89 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Redistribute data for load balancing after partitioning the mesh function rebalance_solver!(u_ode::AbstractVector, mesh::ParallelP4estMesh, equations, dg::DGSEM, cache, old_global_first_quadrant) - # mpi ranks are 0-based, this array uses 1-based indices - global_first_quadrant = unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks() + 1) - if global_first_quadrant[mpi_rank()+1] == old_global_first_quadrant[mpi_rank()+1] && - global_first_quadrant[mpi_rank()+2] == old_global_first_quadrant[mpi_rank()+2] - # Global ids of first and last local quadrants are the same for newly partitioned mesh so the - # solver does not need to be rebalanced on this rank. - # Container init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there are other MPI ranks that need to be rebalanced if this function is called) - reinitialize_containers!(mesh, equations, dg, cache) - return - end - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact - # nicely with non-base array types - old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache) - - @trixi_timeit timer() "reinitialize data structures" reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array_native(u_ode, mesh, equations, dg, cache) + # mpi ranks are 0-based, this array uses 1-based indices + global_first_quadrant = unsafe_wrap(Array, + unsafe_load(mesh.p4est).global_first_quadrant, + mpi_nranks() + 1) + if global_first_quadrant[mpi_rank() + 1] == + old_global_first_quadrant[mpi_rank() + 1] && + global_first_quadrant[mpi_rank() + 2] == + old_global_first_quadrant[mpi_rank() + 2] + # Global ids of first and last local quadrants are the same for newly partitioned mesh so the + # solver does not need to be rebalanced on this rank. + # Container init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there are other MPI ranks that need to be rebalanced if this function is called) + reinitialize_containers!(mesh, equations, dg, cache) + return + end + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact + # nicely with non-base array types + old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache) - @trixi_timeit timer() "exchange data" begin - # Collect MPI requests for MPI_Waitall - requests = Vector{MPI.Request}() - # Find elements that will change their rank and send their data to the new rank - for old_element_id in 1:old_n_elements - # Get global quad ID of old element; local quad id is element id - 1 - global_quad_id = old_global_first_quadrant[mpi_rank()+1] + old_element_id - 1 - if !(global_first_quadrant[mpi_rank()+1] <= global_quad_id < global_first_quadrant[mpi_rank()+2]) - # Send element data to new rank, use global_quad_id as tag (non-blocking) - dest = findfirst(r -> global_first_quadrant[r] <= global_quad_id < global_first_quadrant[r+1], - 1:mpi_nranks()) - 1 # mpi ranks 0-based - request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, global_quad_id, mpi_comm()) - push!(requests, request) + @trixi_timeit timer() "reinitialize data structures" begin + reinitialize_containers!(mesh, equations, dg, cache) end - end - # Loop over all elements in new container and either copy them from old container - # or receive them with MPI - for element in eachelement(dg, cache) - # Get global quad ID of element; local quad id is element id - 1 - global_quad_id = global_first_quadrant[mpi_rank()+1] + element - 1 - if old_global_first_quadrant[mpi_rank()+1] <= global_quad_id < old_global_first_quadrant[mpi_rank()+2] - # Quad ids are 0-based, element ids are 1-based, hence add 1 - old_element_id = global_quad_id - old_global_first_quadrant[mpi_rank()+1] + 1 - # Copy old element data to new element container - @views u[:, .., element] .= old_u[:, .., old_element_id] - else - # Receive old element data - src = findfirst(r -> old_global_first_quadrant[r] <= global_quad_id < old_global_first_quadrant[r+1], - 1:mpi_nranks()) - 1 # mpi ranks 0-based - request = MPI.Irecv!(@view(u[:, .., element]), src, global_quad_id, mpi_comm()) - push!(requests, request) - end - end + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array_native(u_ode, mesh, equations, dg, cache) - # Wait for all non-blocking MPI send/receive operations to finish - MPI.Waitall(requests, MPI.Status) - end - end # GC.@preserve old_u_ode -end + @trixi_timeit timer() "exchange data" begin + # Collect MPI requests for MPI_Waitall + requests = Vector{MPI.Request}() + # Find elements that will change their rank and send their data to the new rank + for old_element_id in 1:old_n_elements + # Get global quad ID of old element; local quad id is element id - 1 + global_quad_id = old_global_first_quadrant[mpi_rank() + 1] + + old_element_id - 1 + if !(global_first_quadrant[mpi_rank() + 1] <= global_quad_id < + global_first_quadrant[mpi_rank() + 2]) + # Send element data to new rank, use global_quad_id as tag (non-blocking) + dest = findfirst(r -> global_first_quadrant[r] <= global_quad_id < + global_first_quadrant[r + 1], + 1:mpi_nranks()) - 1 # mpi ranks 0-based + request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, + global_quad_id, mpi_comm()) + push!(requests, request) + end + end + # Loop over all elements in new container and either copy them from old container + # or receive them with MPI + for element in eachelement(dg, cache) + # Get global quad ID of element; local quad id is element id - 1 + global_quad_id = global_first_quadrant[mpi_rank() + 1] + element - 1 + if old_global_first_quadrant[mpi_rank() + 1] <= global_quad_id < + old_global_first_quadrant[mpi_rank() + 2] + # Quad ids are 0-based, element ids are 1-based, hence add 1 + old_element_id = global_quad_id - + old_global_first_quadrant[mpi_rank() + 1] + 1 + # Copy old element data to new element container + @views u[:, .., element] .= old_u[:, .., old_element_id] + else + # Receive old element data + src = findfirst(r -> old_global_first_quadrant[r] <= + global_quad_id < + old_global_first_quadrant[r + 1], + 1:mpi_nranks()) - 1 # mpi ranks 0-based + request = MPI.Irecv!(@view(u[:, .., element]), src, global_quad_id, + mpi_comm()) + push!(requests, request) + end + end -end # @muladd \ No newline at end of file + # Wait for all non-blocking MPI send/receive operations to finish + MPI.Waitall(requests, MPI.Status) + end + end # GC.@preserve old_u_ode +end +end # @muladd diff --git a/src/callbacks_step/amr_dg1d.jl b/src/callbacks_step/amr_dg1d.jl index b16b349189c..e31a74730ea 100644 --- a/src/callbacks_step/amr_dg1d.jl +++ b/src/callbacks_step/amr_dg1d.jl @@ -3,255 +3,255 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Refine elements in the DG solver based on a list of cell_ids that should be refined function refine!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1}, equations, dg::DGSEM, cache, elements_to_refine) - # Return early if there is nothing to do - if isempty(elements_to_refine) - return - end - - # Determine for each existing element whether it needs to be refined - needs_refinement = falses(nelements(dg, cache)) - needs_refinement[elements_to_refine] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - # Get new list of leaf cells - leaf_cell_ids = local_leaf_cells(mesh.tree) - - # re-initialize elements container - @unpack elements = cache - resize!(elements, length(leaf_cell_ids)) - init_elements!(elements, leaf_cell_ids, mesh, dg.basis) - @assert nelements(dg, cache) > old_n_elements - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or refine them - element_id = 1 - for old_element_id in 1:old_n_elements - if needs_refinement[old_element_id] - # Refine element and store solution directly in new data structure - refine_element!(u, element_id, old_u, old_element_id, - adaptor, equations, dg) - element_id += 2^ndims(mesh) - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + # Return early if there is nothing to do + if isempty(elements_to_refine) + return + end + + # Determine for each existing element whether it needs to be refined + needs_refinement = falses(nelements(dg, cache)) + needs_refinement[elements_to_refine] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + # Get new list of leaf cells + leaf_cell_ids = local_leaf_cells(mesh.tree) + + # re-initialize elements container + @unpack elements = cache + resize!(elements, length(leaf_cell_ids)) + init_elements!(elements, leaf_cell_ids, mesh, dg.basis) + @assert nelements(dg, cache) > old_n_elements + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or refine them + element_id = 1 + for old_element_id in 1:old_n_elements + if needs_refinement[old_element_id] + # Refine element and store solution directly in new data structure + refine_element!(u, element_id, old_u, old_element_id, + adaptor, equations, dg) + element_id += 2^ndims(mesh) + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + # Depending on whether the last element processed above had to be refined or not, + # the counter `element_id` can have two different values at the end. + @assert element_id == + nelements(dg, cache) + + 1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # re-initialize interfaces container + @unpack interfaces = cache + resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) + init_interfaces!(interfaces, elements, mesh) + + # re-initialize boundaries container + @unpack boundaries = cache + resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) + init_boundaries!(boundaries, elements, mesh) + + # Sanity check + if isperiodic(mesh.tree) + @assert ninterfaces(interfaces)==1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements") end - # If everything is correct, we should have processed all elements. - # Depending on whether the last element processed above had to be refined or not, - # the counter `element_id` can have two different values at the end. - @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - - # re-initialize interfaces container - @unpack interfaces = cache - resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) - init_interfaces!(interfaces, elements, mesh) - - # re-initialize boundaries container - @unpack boundaries = cache - resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) - init_boundaries!(boundaries, elements, mesh) - - # Sanity check - if isperiodic(mesh.tree) - @assert ninterfaces(interfaces) == 1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements") - end - - return nothing -end + return nothing +end # TODO: Taal compare performance of different implementations # Refine solution data u for an element, using L2 projection (interpolation) -function refine_element!(u::AbstractArray{<:Any,3}, element_id, +function refine_element!(u::AbstractArray{<:Any, 3}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg) - @unpack forward_upper, forward_lower = adaptor - - # Store new element ids - left_id = element_id - right_id = element_id + 1 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) >= old_element_id - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) >= element_id + 1 - end - - # Interpolate to left element - for i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, element_id)) - for k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, old_element_id) * forward_lower[i, k] - end - set_node_vars!(u, acc, equations, dg, i, left_id) - end - - # Interpolate to right element - for i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, element_id)) - for k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, old_element_id) * forward_upper[i, k] + @unpack forward_upper, forward_lower = adaptor + + # Store new element ids + left_id = element_id + right_id = element_id + 1 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) >= old_element_id + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) >= element_id + 1 end - set_node_vars!(u, acc, equations, dg, i, right_id) - end - return nothing -end + # Interpolate to left element + for i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, element_id)) + for k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, old_element_id) * + forward_lower[i, k] + end + set_node_vars!(u, acc, equations, dg, i, left_id) + end + # Interpolate to right element + for i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, element_id)) + for k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, old_element_id) * + forward_upper[i, k] + end + set_node_vars!(u, acc, equations, dg, i, right_id) + end + return nothing +end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed function coarsen!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1}, equations, dg::DGSEM, cache, elements_to_remove) - # Return early if there is nothing to do - if isempty(elements_to_remove) - return - end - - # Determine for each old element whether it needs to be removed - to_be_removed = falses(nelements(dg, cache)) - to_be_removed[elements_to_remove] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - # Get new list of leaf cells - leaf_cell_ids = local_leaf_cells(mesh.tree) - - # re-initialize elements container - @unpack elements = cache - resize!(elements, length(leaf_cell_ids)) - init_elements!(elements, leaf_cell_ids, mesh, dg.basis) - @assert nelements(dg, cache) < old_n_elements - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or coarsen them - skip = 0 - element_id = 1 - for old_element_id in 1:old_n_elements - # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements - if skip > 0 - skip -= 1 - continue - end - - if to_be_removed[old_element_id] - # If an element is to be removed, sanity check if the following elements - # are also marked - otherwise there would be an error in the way the - # cells/elements are sorted - @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order" - - # Coarsen elements and store solution directly in new data structure - coarsen_elements!(u, element_id, old_u, old_element_id, - adaptor, equations, dg) - element_id += 1 - skip = 2^ndims(mesh) - 1 - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + # Return early if there is nothing to do + if isempty(elements_to_remove) + return end - # If everything is correct, we should have processed all elements. - @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - - # re-initialize interfaces container - @unpack interfaces = cache - resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) - init_interfaces!(interfaces, elements, mesh) - - # re-initialize boundaries container - @unpack boundaries = cache - resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) - init_boundaries!(boundaries, elements, mesh) - - # Sanity check - if isperiodic(mesh.tree) - @assert ninterfaces(interfaces) == 1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements") - end - - return nothing -end + # Determine for each old element whether it needs to be removed + to_be_removed = falses(nelements(dg, cache)) + to_be_removed[elements_to_remove] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + # Get new list of leaf cells + leaf_cell_ids = local_leaf_cells(mesh.tree) + + # re-initialize elements container + @unpack elements = cache + resize!(elements, length(leaf_cell_ids)) + init_elements!(elements, leaf_cell_ids, mesh, dg.basis) + @assert nelements(dg, cache) < old_n_elements + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or coarsen them + skip = 0 + element_id = 1 + for old_element_id in 1:old_n_elements + # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements + if skip > 0 + skip -= 1 + continue + end + + if to_be_removed[old_element_id] + # If an element is to be removed, sanity check if the following elements + # are also marked - otherwise there would be an error in the way the + # cells/elements are sorted + @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order" + + # Coarsen elements and store solution directly in new data structure + coarsen_elements!(u, element_id, old_u, old_element_id, + adaptor, equations, dg) + element_id += 1 + skip = 2^ndims(mesh) - 1 + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # re-initialize interfaces container + @unpack interfaces = cache + resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) + init_interfaces!(interfaces, elements, mesh) + + # re-initialize boundaries container + @unpack boundaries = cache + resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) + init_boundaries!(boundaries, elements, mesh) + + # Sanity check + if isperiodic(mesh.tree) + @assert ninterfaces(interfaces)==1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements") + end + + return nothing +end # TODO: Taal compare performance of different implementations # Coarsen solution data u for two elements, using L2 projection -function coarsen_elements!(u::AbstractArray{<:Any,3}, element_id, +function coarsen_elements!(u::AbstractArray{<:Any, 3}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg) - @unpack reverse_upper, reverse_lower = adaptor - - # Store old element ids - left_id = old_element_id - right_id = old_element_id + 1 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) >= old_element_id + 1 - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) >= element_id - end - - for i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, element_id)) - - # Project from lower left element - for k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, left_id) * reverse_lower[i, k] - end - - # Project from lower right element - for k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, right_id) * reverse_upper[i, k] + @unpack reverse_upper, reverse_lower = adaptor + + # Store old element ids + left_id = old_element_id + right_id = old_element_id + 1 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) >= old_element_id + 1 + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) >= element_id end - # Update value - set_node_vars!(u, acc, equations, dg, i, element_id) - end -end + for i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, element_id)) + # Project from lower left element + for k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, left_id) * reverse_lower[i, k] + end -# this method is called when an `ControllerThreeLevel` is constructed -function create_cache(::Type{ControllerThreeLevel}, mesh::TreeMesh{1}, equations, dg::DG, cache) + # Project from lower right element + for k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, right_id) * + reverse_upper[i, k] + end - controller_value = Vector{Int}(undef, nelements(dg, cache)) - return (; controller_value) + # Update value + set_node_vars!(u, acc, equations, dg, i, element_id) + end end -function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{1}, equations, dg::DG, cache) - - controller_value = Vector{Int}(undef, nelements(dg, cache)) - return (; controller_value) +# this method is called when an `ControllerThreeLevel` is constructed +function create_cache(::Type{ControllerThreeLevel}, mesh::TreeMesh{1}, equations, + dg::DG, cache) + controller_value = Vector{Int}(undef, nelements(dg, cache)) + return (; controller_value) end - +function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{1}, + equations, dg::DG, cache) + controller_value = Vector{Int}(undef, nelements(dg, cache)) + return (; controller_value) +end end # @muladd diff --git a/src/callbacks_step/amr_dg2d.jl b/src/callbacks_step/amr_dg2d.jl index 9f677d1dc4d..400d16347d5 100644 --- a/src/callbacks_step/amr_dg2d.jl +++ b/src/callbacks_step/amr_dg2d.jl @@ -3,334 +3,346 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Redistribute data for load balancing after partitioning the mesh function rebalance_solver!(u_ode::AbstractVector, mesh::TreeMesh{2}, equations, dg::DGSEM, cache, old_mpi_ranks_per_cell) - if cache.elements.cell_ids == local_leaf_cells(mesh.tree) - # Cell ids of the current elements are the same as the local leaf cells of the - # newly partitioned mesh, so the solver doesn't need to be rebalanced on this rank. - # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there are other MPI ranks that need to be rebalanced if this function is called) - reinitialize_containers!(mesh, equations, dg, cache) - return - end - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_cell_ids = copy(cache.elements.cell_ids) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact - # nicely with non-base array types - old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache) - - @trixi_timeit timer() "reinitialize data structures" reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array_native(u_ode, mesh, equations, dg, cache) - - # Get new list of leaf cells - leaf_cell_ids = local_leaf_cells(mesh.tree) - - @trixi_timeit timer() "exchange data" begin - # Collect MPI requests for MPI_Waitall - requests = Vector{MPI.Request}() - - # Find elements that will change their rank and send their data to the new rank - for old_element_id in 1:old_n_elements - cell_id = old_cell_ids[old_element_id] - if !(cell_id in leaf_cell_ids) - # Send element data to new rank, use cell_id as tag (non-blocking) - dest = mesh.tree.mpi_ranks[cell_id] - request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, cell_id, mpi_comm()) - push!(requests, request) - end - end - - # Loop over all elements in new container and either copy them from old container - # or receive them with MPI - for element in eachelement(dg, cache) - cell_id = cache.elements.cell_ids[element] - if cell_id in old_cell_ids - old_element_id = searchsortedfirst(old_cell_ids, cell_id) - # Copy old element data to new element container - @views u[:, .., element] .= old_u[:, .., old_element_id] - else - # Receive old element data - src = old_mpi_ranks_per_cell[cell_id] - request = MPI.Irecv!(@view(u[:, .., element]), src, cell_id, mpi_comm()) - push!(requests, request) + if cache.elements.cell_ids == local_leaf_cells(mesh.tree) + # Cell ids of the current elements are the same as the local leaf cells of the + # newly partitioned mesh, so the solver doesn't need to be rebalanced on this rank. + # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there are other MPI ranks that need to be rebalanced if this function is called) + reinitialize_containers!(mesh, equations, dg, cache) + return + end + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_cell_ids = copy(cache.elements.cell_ids) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact + # nicely with non-base array types + old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache) + + @trixi_timeit timer() "reinitialize data structures" begin + reinitialize_containers!(mesh, equations, dg, cache) end - end - # Wait for all non-blocking MPI send/receive operations to finish - MPI.Waitall(requests, MPI.Status) - end - end # GC.@preserve old_u_ode + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array_native(u_ode, mesh, equations, dg, cache) + + # Get new list of leaf cells + leaf_cell_ids = local_leaf_cells(mesh.tree) + + @trixi_timeit timer() "exchange data" begin + # Collect MPI requests for MPI_Waitall + requests = Vector{MPI.Request}() + + # Find elements that will change their rank and send their data to the new rank + for old_element_id in 1:old_n_elements + cell_id = old_cell_ids[old_element_id] + if !(cell_id in leaf_cell_ids) + # Send element data to new rank, use cell_id as tag (non-blocking) + dest = mesh.tree.mpi_ranks[cell_id] + request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, + cell_id, mpi_comm()) + push!(requests, request) + end + end + + # Loop over all elements in new container and either copy them from old container + # or receive them with MPI + for element in eachelement(dg, cache) + cell_id = cache.elements.cell_ids[element] + if cell_id in old_cell_ids + old_element_id = searchsortedfirst(old_cell_ids, cell_id) + # Copy old element data to new element container + @views u[:, .., element] .= old_u[:, .., old_element_id] + else + # Receive old element data + src = old_mpi_ranks_per_cell[cell_id] + request = MPI.Irecv!(@view(u[:, .., element]), src, cell_id, + mpi_comm()) + push!(requests, request) + end + end + + # Wait for all non-blocking MPI send/receive operations to finish + MPI.Waitall(requests, MPI.Status) + end + end # GC.@preserve old_u_ode end - # Refine elements in the DG solver based on a list of cell_ids that should be refined function refine!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DGSEM, cache, elements_to_refine) - # Return early if there is nothing to do - if isempty(elements_to_refine) - if mpi_isparallel() - # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there still might be other MPI ranks that have refined elements) - reinitialize_containers!(mesh, equations, dg, cache) + # Return early if there is nothing to do + if isempty(elements_to_refine) + if mpi_isparallel() + # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there still might be other MPI ranks that have refined elements) + reinitialize_containers!(mesh, equations, dg, cache) + end + return end - return - end - - # Determine for each existing element whether it needs to be refined - needs_refinement = falses(nelements(dg, cache)) - needs_refinement[elements_to_refine] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or refine them - element_id = 1 - for old_element_id in 1:old_n_elements - if needs_refinement[old_element_id] - # Refine element and store solution directly in new data structure - refine_element!(u, element_id, old_u, old_element_id, - adaptor, equations, dg) - element_id += 2^ndims(mesh) - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + + # Determine for each existing element whether it needs to be refined + needs_refinement = falses(nelements(dg, cache)) + needs_refinement[elements_to_refine] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + reinitialize_containers!(mesh, equations, dg, cache) + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or refine them + element_id = 1 + for old_element_id in 1:old_n_elements + if needs_refinement[old_element_id] + # Refine element and store solution directly in new data structure + refine_element!(u, element_id, old_u, old_element_id, + adaptor, equations, dg) + element_id += 2^ndims(mesh) + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + # Depending on whether the last element processed above had to be refined or not, + # the counter `element_id` can have two different values at the end. + @assert element_id == + nelements(dg, cache) + + 1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # Sanity check + if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && + !mpi_isparallel() + @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") end - # If everything is correct, we should have processed all elements. - # Depending on whether the last element processed above had to be refined or not, - # the counter `element_id` can have two different values at the end. - @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - - # Sanity check - if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && !mpi_isparallel() - @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") - end - - return nothing -end + return nothing +end # TODO: Taal compare performance of different implementations # Refine solution data u for an element, using L2 projection (interpolation) -function refine_element!(u::AbstractArray{<:Any,4}, element_id, +function refine_element!(u::AbstractArray{<:Any, 4}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg) - @unpack forward_upper, forward_lower = adaptor - - # Store new element ids - lower_left_id = element_id - lower_right_id = element_id + 1 - upper_left_id = element_id + 2 - upper_right_id = element_id + 3 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) == nnodes(dg) - @assert size(old_u, 4) >= old_element_id - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) == nnodes(dg) - @assert size( u, 4) >= element_id + 3 - end - - # Interpolate to lower left element - for j in eachnode(dg), i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_lower[i, k] * forward_lower[j, l] - end - set_node_vars!(u, acc, equations, dg, i, j, lower_left_id) - end - - # Interpolate to lower right element - for j in eachnode(dg), i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_upper[i, k] * forward_lower[j, l] + @unpack forward_upper, forward_lower = adaptor + + # Store new element ids + lower_left_id = element_id + lower_right_id = element_id + 1 + upper_left_id = element_id + 2 + upper_right_id = element_id + 3 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) == nnodes(dg) + @assert size(old_u, 4) >= old_element_id + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) == nnodes(dg) + @assert size(u, 4) >= element_id + 3 end - set_node_vars!(u, acc, equations, dg, i, j, lower_right_id) - end - - # Interpolate to upper left element - for j in eachnode(dg), i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_lower[i, k] * forward_upper[j, l] + + # Interpolate to lower left element + for j in eachnode(dg), i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * + forward_lower[i, k] * forward_lower[j, l] + end + set_node_vars!(u, acc, equations, dg, i, j, lower_left_id) end - set_node_vars!(u, acc, equations, dg, i, j, upper_left_id) - end - - # Interpolate to upper right element - for j in eachnode(dg), i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_upper[i, k] * forward_upper[j, l] + + # Interpolate to lower right element + for j in eachnode(dg), i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * + forward_upper[i, k] * forward_lower[j, l] + end + set_node_vars!(u, acc, equations, dg, i, j, lower_right_id) end - set_node_vars!(u, acc, equations, dg, i, j, upper_right_id) - end - return nothing -end + # Interpolate to upper left element + for j in eachnode(dg), i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * + forward_lower[i, k] * forward_upper[j, l] + end + set_node_vars!(u, acc, equations, dg, i, j, upper_left_id) + end + # Interpolate to upper right element + for j in eachnode(dg), i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * + forward_upper[i, k] * forward_upper[j, l] + end + set_node_vars!(u, acc, equations, dg, i, j, upper_right_id) + end + return nothing +end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{2}, P4estMesh{2}}, +function coarsen!(u_ode::AbstractVector, adaptor, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DGSEM, cache, elements_to_remove) - # Return early if there is nothing to do - if isempty(elements_to_remove) - if mpi_isparallel() - # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there still might be other MPI ranks that have coarsened elements) - reinitialize_containers!(mesh, equations, dg, cache) - end - return - end - - # Determine for each old element whether it needs to be removed - to_be_removed = falses(nelements(dg, cache)) - to_be_removed[elements_to_remove] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or coarsen them - skip = 0 - element_id = 1 - for old_element_id in 1:old_n_elements - # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements - if skip > 0 - skip -= 1 - continue - end - - if to_be_removed[old_element_id] - # If an element is to be removed, sanity check if the following elements - # are also marked - otherwise there would be an error in the way the - # cells/elements are sorted - @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order" - - # Coarsen elements and store solution directly in new data structure - coarsen_elements!(u, element_id, old_u, old_element_id, - adaptor, equations, dg) - element_id += 1 - skip = 2^ndims(mesh) - 1 - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + # Return early if there is nothing to do + if isempty(elements_to_remove) + if mpi_isparallel() + # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there still might be other MPI ranks that have coarsened elements) + reinitialize_containers!(mesh, equations, dg, cache) + end + return end - # If everything is correct, we should have processed all elements. - @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - # Sanity check - if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && !mpi_isparallel() - @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") - end + # Determine for each old element whether it needs to be removed + to_be_removed = falses(nelements(dg, cache)) + to_be_removed[elements_to_remove] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + reinitialize_containers!(mesh, equations, dg, cache) + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or coarsen them + skip = 0 + element_id = 1 + for old_element_id in 1:old_n_elements + # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements + if skip > 0 + skip -= 1 + continue + end + + if to_be_removed[old_element_id] + # If an element is to be removed, sanity check if the following elements + # are also marked - otherwise there would be an error in the way the + # cells/elements are sorted + @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order" + + # Coarsen elements and store solution directly in new data structure + coarsen_elements!(u, element_id, old_u, old_element_id, + adaptor, equations, dg) + element_id += 1 + skip = 2^ndims(mesh) - 1 + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # Sanity check + if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && + !mpi_isparallel() + @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") + end - return nothing + return nothing end - # TODO: Taal compare performance of different implementations # Coarsen solution data u for four elements, using L2 projection -function coarsen_elements!(u::AbstractArray{<:Any,4}, element_id, +function coarsen_elements!(u::AbstractArray{<:Any, 4}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg) - @unpack reverse_upper, reverse_lower = adaptor - - # Store old element ids - lower_left_id = old_element_id - lower_right_id = old_element_id + 1 - upper_left_id = old_element_id + 2 - upper_right_id = old_element_id + 3 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) == nnodes(dg) - @assert size(old_u, 4) >= old_element_id + 3 - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) == nnodes(dg) - @assert size( u, 4) >= element_id - end - - for j in eachnode(dg), i in eachnode(dg) - acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - - # Project from lower left element - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, lower_left_id) * reverse_lower[i, k] * reverse_lower[j, l] + @unpack reverse_upper, reverse_lower = adaptor + + # Store old element ids + lower_left_id = old_element_id + lower_right_id = old_element_id + 1 + upper_left_id = old_element_id + 2 + upper_right_id = old_element_id + 3 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) == nnodes(dg) + @assert size(old_u, 4) >= old_element_id + 3 + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) == nnodes(dg) + @assert size(u, 4) >= element_id end - # Project from lower right element - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, lower_right_id) * reverse_upper[i, k] * reverse_lower[j, l] - end - - # Project from upper left element - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, upper_left_id) * reverse_lower[i, k] * reverse_upper[j, l] - end + for j in eachnode(dg), i in eachnode(dg) + acc = zero(get_node_vars(u, equations, dg, i, j, element_id)) - # Project from upper right element - for l in eachnode(dg), k in eachnode(dg) - acc += get_node_vars(old_u, equations, dg, k, l, upper_right_id) * reverse_upper[i, k] * reverse_upper[j, l] - end + # Project from lower left element + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, lower_left_id) * + reverse_lower[i, k] * reverse_lower[j, l] + end - # Update value - set_node_vars!(u, acc, equations, dg, i, j, element_id) - end -end + # Project from lower right element + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, lower_right_id) * + reverse_upper[i, k] * reverse_lower[j, l] + end + # Project from upper left element + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, upper_left_id) * + reverse_lower[i, k] * reverse_upper[j, l] + end -# this method is called when an `ControllerThreeLevel` is constructed -function create_cache(::Type{ControllerThreeLevel}, mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DG, cache) + # Project from upper right element + for l in eachnode(dg), k in eachnode(dg) + acc += get_node_vars(old_u, equations, dg, k, l, upper_right_id) * + reverse_upper[i, k] * reverse_upper[j, l] + end - controller_value = Vector{Int}(undef, nelements(dg, cache)) - return (; controller_value) + # Update value + set_node_vars!(u, acc, equations, dg, i, j, element_id) + end end -function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{2}, equations, dg::DG, cache) - - controller_value = Vector{Int}(undef, nelements(dg, cache)) - return (; controller_value) +# this method is called when an `ControllerThreeLevel` is constructed +function create_cache(::Type{ControllerThreeLevel}, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DG, cache) + controller_value = Vector{Int}(undef, nelements(dg, cache)) + return (; controller_value) end - +function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{2}, + equations, dg::DG, cache) + controller_value = Vector{Int}(undef, nelements(dg, cache)) + return (; controller_value) +end end # @muladd diff --git a/src/callbacks_step/amr_dg3d.jl b/src/callbacks_step/amr_dg3d.jl index 44f73547efc..c8abe6fdb05 100644 --- a/src/callbacks_step/amr_dg3d.jl +++ b/src/callbacks_step/amr_dg3d.jl @@ -3,302 +3,310 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Refine elements in the DG solver based on a list of cell_ids that should be refined function refine!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations, dg::DGSEM, cache, elements_to_refine) - # Return early if there is nothing to do - if isempty(elements_to_refine) - if mpi_isparallel() - # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there still might be other MPI ranks that have refined elements) - reinitialize_containers!(mesh, equations, dg, cache) + # Return early if there is nothing to do + if isempty(elements_to_refine) + if mpi_isparallel() + # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there still might be other MPI ranks that have refined elements) + reinitialize_containers!(mesh, equations, dg, cache) + end + return end - return - end - - # Determine for each existing element whether it needs to be refined - needs_refinement = falses(nelements(dg, cache)) - needs_refinement[elements_to_refine] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or refine them - u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)) - u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)) - element_id = 1 - for old_element_id in 1:old_n_elements - if needs_refinement[old_element_id] - # Refine element and store solution directly in new data structure - refine_element!(u, element_id, old_u, old_element_id, - adaptor, equations, dg, u_tmp1, u_tmp2) - element_id += 2^ndims(mesh) - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + + # Determine for each existing element whether it needs to be refined + needs_refinement = falses(nelements(dg, cache)) + needs_refinement[elements_to_refine] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + reinitialize_containers!(mesh, equations, dg, cache) + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or refine them + u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg)) + u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg)) + element_id = 1 + for old_element_id in 1:old_n_elements + if needs_refinement[old_element_id] + # Refine element and store solution directly in new data structure + refine_element!(u, element_id, old_u, old_element_id, + adaptor, equations, dg, u_tmp1, u_tmp2) + element_id += 2^ndims(mesh) + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + # Depending on whether the last element processed above had to be refined or not, + # the counter `element_id` can have two different values at the end. + @assert element_id == + nelements(dg, cache) + + 1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # Sanity check + if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 + @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") end - # If everything is correct, we should have processed all elements. - # Depending on whether the last element processed above had to be refined or not, - # the counter `element_id` can have two different values at the end. - @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - - # Sanity check - if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 - @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") - end - - return nothing -end + return nothing +end # TODO: Taal compare performance of different implementations # Refine solution data u for an element, using L2 projection (interpolation) -function refine_element!(u::AbstractArray{<:Any,5}, element_id, +function refine_element!(u::AbstractArray{<:Any, 5}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg, u_tmp1, u_tmp2) - @unpack forward_upper, forward_lower = adaptor - - # Store new element ids - bottom_lower_left_id = element_id - bottom_lower_right_id = element_id + 1 - bottom_upper_left_id = element_id + 2 - bottom_upper_right_id = element_id + 3 - top_lower_left_id = element_id + 4 - top_lower_right_id = element_id + 5 - top_upper_left_id = element_id + 6 - top_upper_right_id = element_id + 7 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) == nnodes(dg) - @assert size(old_u, 4) == nnodes(dg) - @assert size(old_u, 5) >= old_element_id - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) == nnodes(dg) - @assert size( u, 4) == nnodes(dg) - @assert size( u, 5) >= element_id + 7 - end - - # Interpolate to bottom lower left element - multiply_dimensionwise!( - view(u, :, :, :, :, bottom_lower_left_id), forward_lower, forward_lower, forward_lower, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to bottom lower right element - multiply_dimensionwise!( - view(u, :, :, :, :, bottom_lower_right_id), forward_upper, forward_lower, forward_lower, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to bottom upper left element - multiply_dimensionwise!( - view(u, :, :, :, :, bottom_upper_left_id), forward_lower, forward_upper, forward_lower, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to bottom upper right element - multiply_dimensionwise!( - view(u, :, :, :, :, bottom_upper_right_id), forward_upper, forward_upper, forward_lower, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to top lower left element - multiply_dimensionwise!( - view(u, :, :, :, :, top_lower_left_id), forward_lower, forward_lower, forward_upper, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to top lower right element - multiply_dimensionwise!( - view(u, :, :, :, :, top_lower_right_id), forward_upper, forward_lower, forward_upper, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to top upper left element - multiply_dimensionwise!( - view(u, :, :, :, :, top_upper_left_id), forward_lower, forward_upper, forward_upper, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - # Interpolate to top upper right element - multiply_dimensionwise!( - view(u, :, :, :, :, top_upper_right_id), forward_upper, forward_upper, forward_upper, - view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) - - return nothing -end - + @unpack forward_upper, forward_lower = adaptor + + # Store new element ids + bottom_lower_left_id = element_id + bottom_lower_right_id = element_id + 1 + bottom_upper_left_id = element_id + 2 + bottom_upper_right_id = element_id + 3 + top_lower_left_id = element_id + 4 + top_lower_right_id = element_id + 5 + top_upper_left_id = element_id + 6 + top_upper_right_id = element_id + 7 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) == nnodes(dg) + @assert size(old_u, 4) == nnodes(dg) + @assert size(old_u, 5) >= old_element_id + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) == nnodes(dg) + @assert size(u, 4) == nnodes(dg) + @assert size(u, 5) >= element_id + 7 + end + # Interpolate to bottom lower left element + multiply_dimensionwise!(view(u, :, :, :, :, bottom_lower_left_id), forward_lower, + forward_lower, forward_lower, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to bottom lower right element + multiply_dimensionwise!(view(u, :, :, :, :, bottom_lower_right_id), forward_upper, + forward_lower, forward_lower, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to bottom upper left element + multiply_dimensionwise!(view(u, :, :, :, :, bottom_upper_left_id), forward_lower, + forward_upper, forward_lower, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to bottom upper right element + multiply_dimensionwise!(view(u, :, :, :, :, bottom_upper_right_id), forward_upper, + forward_upper, forward_lower, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to top lower left element + multiply_dimensionwise!(view(u, :, :, :, :, top_lower_left_id), forward_lower, + forward_lower, forward_upper, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to top lower right element + multiply_dimensionwise!(view(u, :, :, :, :, top_lower_right_id), forward_upper, + forward_lower, forward_upper, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to top upper left element + multiply_dimensionwise!(view(u, :, :, :, :, top_upper_left_id), forward_lower, + forward_upper, forward_upper, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + # Interpolate to top upper right element + multiply_dimensionwise!(view(u, :, :, :, :, top_upper_right_id), forward_upper, + forward_upper, forward_upper, + view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2) + + return nothing +end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed function coarsen!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations, dg::DGSEM, cache, elements_to_remove) - # Return early if there is nothing to do - if isempty(elements_to_remove) - if mpi_isparallel() - # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do - # locally (there still might be other MPI ranks that have coarsened elements) - reinitialize_containers!(mesh, equations, dg, cache) - end - return - end - - # Determine for each old element whether it needs to be removed - to_be_removed = falses(nelements(dg, cache)) - to_be_removed[elements_to_remove] .= true - - # Retain current solution data - old_n_elements = nelements(dg, cache) - old_u_ode = copy(u_ode) - GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed - old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) - - reinitialize_containers!(mesh, equations, dg, cache) - - resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) - u = wrap_array(u_ode, mesh, equations, dg, cache) - - # Loop over all elements in old container and either copy them or coarsen them - u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)) - u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)) - skip = 0 - element_id = 1 - for old_element_id in 1:old_n_elements - # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements - if skip > 0 - skip -= 1 - continue - end - - if to_be_removed[old_element_id] - # If an element is to be removed, sanity check if the following elements - # are also marked - otherwise there would be an error in the way the - # cells/elements are sorted - @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order" - - # Coarsen elements and store solution directly in new data structure - coarsen_elements!(u, element_id, old_u, old_element_id, - adaptor, equations, dg, u_tmp1, u_tmp2) - element_id += 1 - skip = 2^ndims(mesh) - 1 - else - # Copy old element data to new element container - @views u[:, .., element_id] .= old_u[:, .., old_element_id] - element_id += 1 - end + # Return early if there is nothing to do + if isempty(elements_to_remove) + if mpi_isparallel() + # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do + # locally (there still might be other MPI ranks that have coarsened elements) + reinitialize_containers!(mesh, equations, dg, cache) + end + return end - # If everything is correct, we should have processed all elements. - @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" - end # GC.@preserve old_u_ode - # Sanity check - if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 - @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") - end + # Determine for each old element whether it needs to be removed + to_be_removed = falses(nelements(dg, cache)) + to_be_removed[elements_to_remove] .= true + + # Retain current solution data + old_n_elements = nelements(dg, cache) + old_u_ode = copy(u_ode) + GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed + old_u = wrap_array(old_u_ode, mesh, equations, dg, cache) + + reinitialize_containers!(mesh, equations, dg, cache) + + resize!(u_ode, + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) + u = wrap_array(u_ode, mesh, equations, dg, cache) + + # Loop over all elements in old container and either copy them or coarsen them + u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg)) + u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg)) + skip = 0 + element_id = 1 + for old_element_id in 1:old_n_elements + # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements + if skip > 0 + skip -= 1 + continue + end + + if to_be_removed[old_element_id] + # If an element is to be removed, sanity check if the following elements + # are also marked - otherwise there would be an error in the way the + # cells/elements are sorted + @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order" + + # Coarsen elements and store solution directly in new data structure + coarsen_elements!(u, element_id, old_u, old_element_id, + adaptor, equations, dg, u_tmp1, u_tmp2) + element_id += 1 + skip = 2^ndims(mesh) - 1 + else + # Copy old element data to new element container + @views u[:, .., element_id] .= old_u[:, .., old_element_id] + element_id += 1 + end + end + # If everything is correct, we should have processed all elements. + @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))" + end # GC.@preserve old_u_ode + + # Sanity check + if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 + @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements") + end - return nothing + return nothing end - # TODO: Taal compare performance of different implementations # Coarsen solution data u for four elements, using L2 projection -function coarsen_elements!(u::AbstractArray{<:Any,5}, element_id, +function coarsen_elements!(u::AbstractArray{<:Any, 5}, element_id, old_u, old_element_id, adaptor::LobattoLegendreAdaptorL2, equations, dg, u_tmp1, u_tmp2) - @unpack reverse_upper, reverse_lower = adaptor - - # Store old element ids - bottom_lower_left_id = old_element_id - bottom_lower_right_id = old_element_id + 1 - bottom_upper_left_id = old_element_id + 2 - bottom_upper_right_id = old_element_id + 3 - top_lower_left_id = old_element_id + 4 - top_lower_right_id = old_element_id + 5 - top_upper_left_id = old_element_id + 6 - top_upper_right_id = old_element_id + 7 - - @boundscheck begin - @assert old_element_id >= 1 - @assert size(old_u, 1) == nvariables(equations) - @assert size(old_u, 2) == nnodes(dg) - @assert size(old_u, 3) == nnodes(dg) - @assert size(old_u, 4) == nnodes(dg) - @assert size(old_u, 5) >= old_element_id + 7 - @assert element_id >= 1 - @assert size( u, 1) == nvariables(equations) - @assert size( u, 2) == nnodes(dg) - @assert size( u, 3) == nnodes(dg) - @assert size( u, 4) == nnodes(dg) - @assert size( u, 5) >= element_id - end - - # Project from bottom lower left element - multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_lower, reverse_lower, reverse_lower, - view(old_u, :, :, :, :, bottom_lower_left_id), u_tmp1, u_tmp2) - - # Project from bottom lower right element_variables - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_upper, reverse_lower, reverse_lower, - view(old_u, :, :, :, :, bottom_lower_right_id), u_tmp1, u_tmp2) - - # Project from bottom upper left element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_lower, reverse_upper, reverse_lower, - view(old_u, :, :, :, :, bottom_upper_left_id), u_tmp1, u_tmp2) - - # Project from bottom upper right element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_upper, reverse_upper, reverse_lower, - view(old_u, :, :, :, :, bottom_upper_right_id), u_tmp1, u_tmp2) - - # Project from top lower left element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_lower, reverse_lower, reverse_upper, - view(old_u, :, :, :, :, top_lower_left_id), u_tmp1, u_tmp2) - - # Project from top lower right element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_upper, reverse_lower, reverse_upper, - view(old_u, :, :, :, :, top_lower_right_id), u_tmp1, u_tmp2) - - # Project from top upper left element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_lower, reverse_upper, reverse_upper, - view(old_u, :, :, :, :, top_upper_left_id), u_tmp1, u_tmp2) - - # Project from top upper right element - add_multiply_dimensionwise!( - view(u, :, :, :, :, element_id), reverse_upper, reverse_upper, reverse_upper, - view(old_u, :, :, :, :, top_upper_right_id), u_tmp1, u_tmp2) - - return nothing -end + @unpack reverse_upper, reverse_lower = adaptor + + # Store old element ids + bottom_lower_left_id = old_element_id + bottom_lower_right_id = old_element_id + 1 + bottom_upper_left_id = old_element_id + 2 + bottom_upper_right_id = old_element_id + 3 + top_lower_left_id = old_element_id + 4 + top_lower_right_id = old_element_id + 5 + top_upper_left_id = old_element_id + 6 + top_upper_right_id = old_element_id + 7 + + @boundscheck begin + @assert old_element_id >= 1 + @assert size(old_u, 1) == nvariables(equations) + @assert size(old_u, 2) == nnodes(dg) + @assert size(old_u, 3) == nnodes(dg) + @assert size(old_u, 4) == nnodes(dg) + @assert size(old_u, 5) >= old_element_id + 7 + @assert element_id >= 1 + @assert size(u, 1) == nvariables(equations) + @assert size(u, 2) == nnodes(dg) + @assert size(u, 3) == nnodes(dg) + @assert size(u, 4) == nnodes(dg) + @assert size(u, 5) >= element_id + end + # Project from bottom lower left element + multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower, + reverse_lower, reverse_lower, + view(old_u, :, :, :, :, bottom_lower_left_id), u_tmp1, + u_tmp2) + + # Project from bottom lower right element_variables + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper, + reverse_lower, reverse_lower, + view(old_u, :, :, :, :, bottom_lower_right_id), u_tmp1, + u_tmp2) + + # Project from bottom upper left element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower, + reverse_upper, reverse_lower, + view(old_u, :, :, :, :, bottom_upper_left_id), u_tmp1, + u_tmp2) + + # Project from bottom upper right element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper, + reverse_upper, reverse_lower, + view(old_u, :, :, :, :, bottom_upper_right_id), u_tmp1, + u_tmp2) + + # Project from top lower left element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower, + reverse_lower, reverse_upper, + view(old_u, :, :, :, :, top_lower_left_id), u_tmp1, + u_tmp2) + + # Project from top lower right element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper, + reverse_lower, reverse_upper, + view(old_u, :, :, :, :, top_lower_right_id), u_tmp1, + u_tmp2) + + # Project from top upper left element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower, + reverse_upper, reverse_upper, + view(old_u, :, :, :, :, top_upper_left_id), u_tmp1, + u_tmp2) + + # Project from top upper right element + add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper, + reverse_upper, reverse_upper, + view(old_u, :, :, :, :, top_upper_right_id), u_tmp1, + u_tmp2) + + return nothing +end # this method is called when an `ControllerThreeLevel` is constructed function create_cache(::Type{ControllerThreeLevel}, mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations, dg::DG, cache) - - controller_value = Vector{Int}(undef, nelements(dg, cache)) - return (; controller_value) + controller_value = Vector{Int}(undef, nelements(dg, cache)) + return (; controller_value) end - - end # @muladd diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl index c6a2ee6fb95..2e038401df7 100644 --- a/src/callbacks_step/analysis.jl +++ b/src/callbacks_step/analysis.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # TODO: Taal refactor # - analysis_interval part as PeriodicCallback called after a certain amount of simulation time @@ -36,539 +36,557 @@ evaluating the computational performance, such as the total runtime, the perform (time/DOF/rhs!), the time spent in garbage collection (GC), or the current memory usage (alloc'd memory). """ -mutable struct AnalysisCallback{Analyzer, AnalysisIntegrals, InitialStateIntegrals, Cache} - start_time::Float64 - start_time_last_analysis::Float64 - ncalls_rhs_last_analysis::Int - start_gc_time::Float64 - interval::Int - save_analysis::Bool - output_directory::String - analysis_filename::String - analyzer::Analyzer - analysis_errors::Vector{Symbol} - analysis_integrals::AnalysisIntegrals - initial_state_integrals::InitialStateIntegrals - cache::Cache +mutable struct AnalysisCallback{Analyzer, AnalysisIntegrals, InitialStateIntegrals, + Cache} + start_time::Float64 + start_time_last_analysis::Float64 + ncalls_rhs_last_analysis::Int + start_gc_time::Float64 + interval::Int + save_analysis::Bool + output_directory::String + analysis_filename::String + analyzer::Analyzer + analysis_errors::Vector{Symbol} + analysis_integrals::AnalysisIntegrals + initial_state_integrals::InitialStateIntegrals + cache::Cache end - # TODO: Taal bikeshedding, implement a method with less information and the signature # function Base.show(io::IO, analysis_callback::AnalysisCallback) # end -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AnalysisCallback}) - @nospecialize cb # reduce precompilation time +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:AnalysisCallback}) + @nospecialize cb # reduce precompilation time - if get(io, :compact, false) - show(io, cb) - else - analysis_callback = cb.affect! + if get(io, :compact, false) + show(io, cb) + else + analysis_callback = cb.affect! - setup = Pair{String,Any}[ - "interval" => analysis_callback.interval, - "analyzer" => analysis_callback.analyzer, - ] - for (idx, error) in enumerate(analysis_callback.analysis_errors) - push!(setup, "│ error " * string(idx) => error) - end - for (idx, integral) in enumerate(analysis_callback.analysis_integrals) - push!(setup, "│ integral " * string(idx) => integral) - end - push!(setup, "save analysis to file" => analysis_callback.save_analysis ? "yes" : "no") - if analysis_callback.save_analysis - push!(setup, "│ filename" => analysis_callback.analysis_filename) - push!(setup, "│ output directory" => abspath(normpath(analysis_callback.output_directory))) + setup = Pair{String, Any}["interval" => analysis_callback.interval, + "analyzer" => analysis_callback.analyzer] + for (idx, error) in enumerate(analysis_callback.analysis_errors) + push!(setup, "│ error " * string(idx) => error) + end + for (idx, integral) in enumerate(analysis_callback.analysis_integrals) + push!(setup, "│ integral " * string(idx) => integral) + end + push!(setup, + "save analysis to file" => analysis_callback.save_analysis ? "yes" : "no") + if analysis_callback.save_analysis + push!(setup, "│ filename" => analysis_callback.analysis_filename) + push!(setup, + "│ output directory" => abspath(normpath(analysis_callback.output_directory))) + end + summary_box(io, "AnalysisCallback", setup) end - summary_box(io, "AnalysisCallback", setup) - end end - function AnalysisCallback(semi::AbstractSemidiscretization; kwargs...) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - AnalysisCallback(mesh, equations, solver, cache; kwargs...) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + AnalysisCallback(mesh, equations, solver, cache; kwargs...) end function AnalysisCallback(mesh, equations::AbstractEquations, solver, cache; - interval=0, - save_analysis=false, - output_directory="out", - analysis_filename="analysis.dat", - extra_analysis_errors=Symbol[], - analysis_errors=union(default_analysis_errors(equations), extra_analysis_errors), - extra_analysis_integrals=(), - analysis_integrals=union(default_analysis_integrals(equations), extra_analysis_integrals), - RealT=real(solver), - uEltype=eltype(cache.elements), + interval = 0, + save_analysis = false, + output_directory = "out", + analysis_filename = "analysis.dat", + extra_analysis_errors = Symbol[], + analysis_errors = union(default_analysis_errors(equations), + extra_analysis_errors), + extra_analysis_integrals = (), + analysis_integrals = union(default_analysis_integrals(equations), + extra_analysis_integrals), + RealT = real(solver), + uEltype = eltype(cache.elements), kwargs...) - # Decide when the callback is activated. - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - condition = (u, t, integrator) -> interval > 0 && ( (integrator.stats.naccept % interval == 0 && - !(integrator.stats.naccept == 0 && integrator.iter > 0)) || - isfinished(integrator)) - - analyzer = SolutionAnalyzer(solver; kwargs...) - cache_analysis = create_cache_analysis(analyzer, mesh, equations, solver, cache, RealT, uEltype) - - analysis_callback = AnalysisCallback(0.0, 0.0, 0, 0.0, - interval, save_analysis, output_directory, analysis_filename, - analyzer, - analysis_errors, Tuple(analysis_integrals), - SVector(ntuple(_ -> zero(uEltype), Val(nvariables(equations)))), - cache_analysis) - - DiscreteCallback(condition, analysis_callback, - save_positions=(false,false), - initialize=initialize!) + # Decide when the callback is activated. + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + condition = (u, t, integrator) -> interval > 0 && + ((integrator.stats.naccept % interval == 0 && + !(integrator.stats.naccept == 0 && integrator.iter > 0)) || + isfinished(integrator)) + + analyzer = SolutionAnalyzer(solver; kwargs...) + cache_analysis = create_cache_analysis(analyzer, mesh, equations, solver, cache, + RealT, uEltype) + + analysis_callback = AnalysisCallback(0.0, 0.0, 0, 0.0, + interval, save_analysis, output_directory, + analysis_filename, + analyzer, + analysis_errors, Tuple(analysis_integrals), + SVector(ntuple(_ -> zero(uEltype), + Val(nvariables(equations)))), + cache_analysis) + + DiscreteCallback(condition, analysis_callback, + save_positions = (false, false), + initialize = initialize!) end +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, + integrator) where {Condition, Affect! <: AnalysisCallback} + semi = integrator.p + initial_state_integrals = integrate(u_ode, semi) + _, equations, _, _ = mesh_equations_solver_cache(semi) -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator) where {Condition, Affect!<:AnalysisCallback} - semi = integrator.p - initial_state_integrals = integrate(u_ode, semi) - _, equations, _, _ = mesh_equations_solver_cache(semi) - - analysis_callback = cb.affect! - analysis_callback.initial_state_integrals = initial_state_integrals - @unpack save_analysis, output_directory, analysis_filename, analysis_errors, analysis_integrals = analysis_callback - - if save_analysis && mpi_isroot() - mkpath(output_directory) - - # write header of output file - open(joinpath(output_directory, analysis_filename), "w") do io - @printf(io, "#%-8s", "timestep") - @printf(io, " %-14s", "time") - @printf(io, " %-14s", "dt") - if :l2_error in analysis_errors - for v in varnames(cons2cons, equations) - @printf(io, " %-14s", "l2_" * v) - end - end - if :linf_error in analysis_errors - for v in varnames(cons2cons, equations) - @printf(io, " %-14s", "linf_" * v) - end - end - if :conservation_error in analysis_errors - for v in varnames(cons2cons, equations) - @printf(io, " %-14s", "cons_" * v) - end - end - if :residual in analysis_errors - for v in varnames(cons2cons, equations) - @printf(io, " %-14s", "res_" * v) - end - end - if :l2_error_primitive in analysis_errors - for v in varnames(cons2prim, equations) - @printf(io, " %-14s", "l2_" * v) - end - end - if :linf_error_primitive in analysis_errors - for v in varnames(cons2prim, equations) - @printf(io, " %-14s", "linf_" * v) + analysis_callback = cb.affect! + analysis_callback.initial_state_integrals = initial_state_integrals + @unpack save_analysis, output_directory, analysis_filename, analysis_errors, analysis_integrals = analysis_callback + + if save_analysis && mpi_isroot() + mkpath(output_directory) + + # write header of output file + open(joinpath(output_directory, analysis_filename), "w") do io + @printf(io, "#%-8s", "timestep") + @printf(io, " %-14s", "time") + @printf(io, " %-14s", "dt") + if :l2_error in analysis_errors + for v in varnames(cons2cons, equations) + @printf(io, " %-14s", "l2_"*v) + end + end + if :linf_error in analysis_errors + for v in varnames(cons2cons, equations) + @printf(io, " %-14s", "linf_"*v) + end + end + if :conservation_error in analysis_errors + for v in varnames(cons2cons, equations) + @printf(io, " %-14s", "cons_"*v) + end + end + if :residual in analysis_errors + for v in varnames(cons2cons, equations) + @printf(io, " %-14s", "res_"*v) + end + end + if :l2_error_primitive in analysis_errors + for v in varnames(cons2prim, equations) + @printf(io, " %-14s", "l2_"*v) + end + end + if :linf_error_primitive in analysis_errors + for v in varnames(cons2prim, equations) + @printf(io, " %-14s", "linf_"*v) + end + end + + for quantity in analysis_integrals + @printf(io, " %-14s", pretty_form_ascii(quantity)) + end + + println(io) end - end - - for quantity in analysis_integrals - @printf(io, " %-14s", pretty_form_ascii(quantity)) - end - - println(io) end - end - - # Record current time using a high-resolution clock - analysis_callback.start_time = time_ns() + # Record current time using a high-resolution clock + analysis_callback.start_time = time_ns() - # Record current time for performance index computation - analysis_callback.start_time_last_analysis = time_ns() + # Record current time for performance index computation + analysis_callback.start_time_last_analysis = time_ns() - # Record current number of `rhs!` calls for performance index computation - analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter) + # Record current number of `rhs!` calls for performance index computation + analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter) - # Record total time spent in garbage collection so far using a high-resolution clock - # Note: For details see the actual callback function below - analysis_callback.start_gc_time = Base.gc_time_ns() + # Record total time spent in garbage collection so far using a high-resolution clock + # Note: For details see the actual callback function below + analysis_callback.start_gc_time = Base.gc_time_ns() - analysis_callback(integrator) - return nothing + analysis_callback(integrator) + return nothing end - # TODO: Taal refactor, allow passing an IO object (which could be devnull to avoid cluttering the console) function (analysis_callback::AnalysisCallback)(integrator) - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - @unpack dt, t = integrator - iter = integrator.stats.naccept - - # Record performance measurements and compute performance index (PID) - runtime_since_last_analysis = 1.0e-9 * (time_ns() - analysis_callback.start_time_last_analysis) - # PID is an MPI-aware measure of how much time per global degree of freedom (i.e., over all ranks) - # and per `rhs!` evaluation is required. MPI-aware means that it essentially adds up the time - # spent on each MPI rank. Thus, in an ideally parallelized program, the PID should be constant - # independent of the number of MPI ranks used, since, e.g., using 4x the number of ranks should - # divide the runtime on each rank by 4. See also the Trixi.jl docs ("Performance" section) for - # more information. - ncalls_rhs_since_last_analysis = (ncalls(semi.performance_counter) - - analysis_callback.ncalls_rhs_last_analysis) - performance_index = runtime_since_last_analysis * mpi_nranks() / (ndofsglobal(mesh, solver, cache) - * ncalls_rhs_since_last_analysis) - - # Compute the total runtime since the analysis callback has been initialized, in seconds - runtime_absolute = 1.0e-9 * (time_ns() - analysis_callback.start_time) - - # Compute the relative runtime as time spent in `rhs!` divided by the number of calls to `rhs!` - # and the number of local degrees of freedom - # OBS! This computation must happen *after* the PID computation above, since `take!(...)` - # will reset the number of calls to `rhs!` - runtime_relative = 1.0e-9 * take!(semi.performance_counter) / ndofs(semi) - - # Compute the total time spent in garbage collection since the analysis callback has been - # initialized, in seconds - # Note: `Base.gc_time_ns()` is not part of the public Julia API but has been available at least - # since Julia 1.6. Should this function be removed without replacement in a future Julia - # release, just delete this analysis quantity from the callback. - # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L83-L84 - gc_time_absolute = 1.0e-9 * (Base.gc_time_ns() - analysis_callback.start_gc_time) - - # Compute the percentage of total time that was spent in garbage collection - gc_time_percentage = gc_time_absolute / runtime_absolute - - # Obtain the current memory usage of the Julia garbage collector, in MiB, i.e., the total size of - # objects in memory that have been allocated by the JIT compiler or the user code. - # Note: `Base.gc_live_bytes()` is not part of the public Julia API but has been available at least - # since Julia 1.6. Should this function be removed without replacement in a future Julia - # release, just delete this analysis quantity from the callback. - # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L86-L97 - memory_use = Base.gc_live_bytes() / 2^20 # bytes -> MiB - - @trixi_timeit timer() "analyze solution" begin - # General information - mpi_println() - mpi_println("─"^100) - # TODO: Taal refactor, polydeg is specific to DGSEM - mpi_println(" Simulation running '", get_name(equations), "' with ", summary(solver)) - mpi_println("─"^100) - mpi_println(" #timesteps: " * @sprintf("% 14d", iter) * - " " * - " run time: " * @sprintf("%10.8e s", runtime_absolute)) - mpi_println(" Δt: " * @sprintf("%10.8e", dt) * - " " * - " └── GC time: " * @sprintf("%10.8e s (%5.3f%%)", gc_time_absolute, gc_time_percentage)) - mpi_println(" sim. time: " * @sprintf("%10.8e", t) * - " " * - " time/DOF/rhs!: " * @sprintf("%10.8e s", runtime_relative)) - mpi_println(" " * " " * - " " * - " PID: " * @sprintf("%10.8e s", performance_index)) - mpi_println(" #DOF: " * @sprintf("% 14d", ndofs(semi)) * - " " * - " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use)) - mpi_println(" #elements: " * @sprintf("% 14d", nelements(mesh, solver, cache))) - - # Level information (only show for AMR) - print_amr_information(integrator.opts.callback, mesh, solver, cache) - mpi_println() - - # Open file for appending and store time step and time information - if mpi_isroot() && analysis_callback.save_analysis - io = open(joinpath(analysis_callback.output_directory, analysis_callback.analysis_filename), "a") - @printf(io, "% 9d", iter) - @printf(io, " %10.8e", t) - @printf(io, " %10.8e", dt) - else - io = devnull - end - - # Calculate current time derivative (needed for semidiscrete entropy time derivative, residual, etc.) - du_ode = first(get_tmp_cache(integrator)) - # `integrator.f` is usually just a call to `rhs!` - # However, we want to allow users to modify the ODE RHS outside of Trixi.jl - # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for - # hyperbolic-parabolic systems. - @notimeit timer() integrator.f(du_ode, integrator.u, semi, t) - u = wrap_array(integrator.u, mesh, equations, solver, cache) - du = wrap_array(du_ode, mesh, equations, solver, cache) - l2_error, linf_error = analysis_callback(io, du, u, integrator.u, t, semi) - - mpi_println("─"^100) - mpi_println() + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + @unpack dt, t = integrator + iter = integrator.stats.naccept + + # Record performance measurements and compute performance index (PID) + runtime_since_last_analysis = 1.0e-9 * (time_ns() - + analysis_callback.start_time_last_analysis) + # PID is an MPI-aware measure of how much time per global degree of freedom (i.e., over all ranks) + # and per `rhs!` evaluation is required. MPI-aware means that it essentially adds up the time + # spent on each MPI rank. Thus, in an ideally parallelized program, the PID should be constant + # independent of the number of MPI ranks used, since, e.g., using 4x the number of ranks should + # divide the runtime on each rank by 4. See also the Trixi.jl docs ("Performance" section) for + # more information. + ncalls_rhs_since_last_analysis = (ncalls(semi.performance_counter) + - + analysis_callback.ncalls_rhs_last_analysis) + performance_index = runtime_since_last_analysis * mpi_nranks() / + (ndofsglobal(mesh, solver, cache) + * + ncalls_rhs_since_last_analysis) + + # Compute the total runtime since the analysis callback has been initialized, in seconds + runtime_absolute = 1.0e-9 * (time_ns() - analysis_callback.start_time) + + # Compute the relative runtime as time spent in `rhs!` divided by the number of calls to `rhs!` + # and the number of local degrees of freedom + # OBS! This computation must happen *after* the PID computation above, since `take!(...)` + # will reset the number of calls to `rhs!` + runtime_relative = 1.0e-9 * take!(semi.performance_counter) / ndofs(semi) + + # Compute the total time spent in garbage collection since the analysis callback has been + # initialized, in seconds + # Note: `Base.gc_time_ns()` is not part of the public Julia API but has been available at least + # since Julia 1.6. Should this function be removed without replacement in a future Julia + # release, just delete this analysis quantity from the callback. + # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L83-L84 + gc_time_absolute = 1.0e-9 * (Base.gc_time_ns() - analysis_callback.start_gc_time) + + # Compute the percentage of total time that was spent in garbage collection + gc_time_percentage = gc_time_absolute / runtime_absolute + + # Obtain the current memory usage of the Julia garbage collector, in MiB, i.e., the total size of + # objects in memory that have been allocated by the JIT compiler or the user code. + # Note: `Base.gc_live_bytes()` is not part of the public Julia API but has been available at least + # since Julia 1.6. Should this function be removed without replacement in a future Julia + # release, just delete this analysis quantity from the callback. + # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L86-L97 + memory_use = Base.gc_live_bytes() / 2^20 # bytes -> MiB + + @trixi_timeit timer() "analyze solution" begin + # General information + mpi_println() + mpi_println("─"^100) + mpi_println(" Simulation running '", get_name(equations), "' with ", + summary(solver)) + mpi_println("─"^100) + mpi_println(" #timesteps: " * @sprintf("% 14d", iter) * + " " * + " run time: " * @sprintf("%10.8e s", runtime_absolute)) + mpi_println(" Δt: " * @sprintf("%10.8e", dt) * + " " * + " └── GC time: " * + @sprintf("%10.8e s (%5.3f%%)", gc_time_absolute, gc_time_percentage)) + mpi_println(" sim. time: " * @sprintf("%10.8e", t) * + " " * + " time/DOF/rhs!: " * @sprintf("%10.8e s", runtime_relative)) + mpi_println(" " * " " * + " " * + " PID: " * @sprintf("%10.8e s", performance_index)) + mpi_println(" #DOF: " * @sprintf("% 14d", ndofs(semi)) * + " " * + " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use)) + mpi_println(" #elements: " * + @sprintf("% 14d", nelements(mesh, solver, cache))) + + # Level information (only show for AMR) + print_amr_information(integrator.opts.callback, mesh, solver, cache) + mpi_println() + + # Open file for appending and store time step and time information + if mpi_isroot() && analysis_callback.save_analysis + io = open(joinpath(analysis_callback.output_directory, + analysis_callback.analysis_filename), "a") + @printf(io, "% 9d", iter) + @printf(io, " %10.8e", t) + @printf(io, " %10.8e", dt) + else + io = devnull + end - # Add line break and close analysis file if it was opened - if mpi_isroot() && analysis_callback.save_analysis - # This resolves a possible type instability introduced above, since `io` - # can either be an `IOStream` or `devnull`, but we know that it must be - # an `IOStream here`. - println(io::IOStream) - close(io::IOStream) + # Calculate current time derivative (needed for semidiscrete entropy time derivative, residual, etc.) + du_ode = first(get_tmp_cache(integrator)) + # `integrator.f` is usually just a call to `rhs!` + # However, we want to allow users to modify the ODE RHS outside of Trixi.jl + # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for + # hyperbolic-parabolic systems. + @notimeit timer() integrator.f(du_ode, integrator.u, semi, t) + u = wrap_array(integrator.u, mesh, equations, solver, cache) + du = wrap_array(du_ode, mesh, equations, solver, cache) + l2_error, linf_error = analysis_callback(io, du, u, integrator.u, t, semi) + + mpi_println("─"^100) + mpi_println() + + # Add line break and close analysis file if it was opened + if mpi_isroot() && analysis_callback.save_analysis + # This resolves a possible type instability introduced above, since `io` + # can either be an `IOStream` or `devnull`, but we know that it must be + # an `IOStream here`. + println(io::IOStream) + close(io::IOStream) + end end - end - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) - # Reset performance measurements - analysis_callback.start_time_last_analysis = time_ns() - analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter) + # Reset performance measurements + analysis_callback.start_time_last_analysis = time_ns() + analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter) - # Return errors for EOC analysis - return l2_error, linf_error + # Return errors for EOC analysis + return l2_error, linf_error end - # This method is just called internally from `(analysis_callback::AnalysisCallback)(integrator)` # and serves as a function barrier. Additionally, it makes the code easier to profile and optimize. function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi) - @unpack analyzer, analysis_errors, analysis_integrals = analysis_callback - cache_analysis = analysis_callback.cache - _, equations, _, _ = mesh_equations_solver_cache(semi) - - # Calculate and print derived quantities (error norms, entropy etc.) - # Variable names required for L2 error, Linf error, and conservation error - if any(q in analysis_errors for q in - (:l2_error, :linf_error, :conservation_error, :residual)) && mpi_isroot() - print(" Variable: ") - for v in eachvariable(equations) - @printf(" %-14s", varnames(cons2cons, equations)[v]) - end - println() - end - - # Calculate L2/Linf errors, which are also returned - l2_error, linf_error = calc_error_norms(u_ode, t, analyzer, semi, cache_analysis) - - if mpi_isroot() - # L2 error - if :l2_error in analysis_errors - print(" L2 error: ") - for v in eachvariable(equations) - @printf(" % 10.8e", l2_error[v]) - @printf(io, " % 10.8e", l2_error[v]) - end - println() - end - - # Linf error - if :linf_error in analysis_errors - print(" Linf error: ") - for v in eachvariable(equations) - @printf(" % 10.8e", linf_error[v]) - @printf(io, " % 10.8e", linf_error[v]) - end - println() + @unpack analyzer, analysis_errors, analysis_integrals = analysis_callback + cache_analysis = analysis_callback.cache + _, equations, _, _ = mesh_equations_solver_cache(semi) + + # Calculate and print derived quantities (error norms, entropy etc.) + # Variable names required for L2 error, Linf error, and conservation error + if any(q in analysis_errors + for q in (:l2_error, :linf_error, :conservation_error, :residual)) && + mpi_isroot() + print(" Variable: ") + for v in eachvariable(equations) + @printf(" %-14s", varnames(cons2cons, equations)[v]) + end + println() end - end - - # Conservation error - if :conservation_error in analysis_errors - @unpack initial_state_integrals = analysis_callback - state_integrals = integrate(u_ode, semi) + # Calculate L2/Linf errors, which are also returned + l2_error, linf_error = calc_error_norms(u_ode, t, analyzer, semi, cache_analysis) if mpi_isroot() - print(" |∑U - ∑U₀|: ") - for v in eachvariable(equations) - err = abs(state_integrals[v] - initial_state_integrals[v]) - @printf(" % 10.8e", err) - @printf(io, " % 10.8e", err) - end - println() + # L2 error + if :l2_error in analysis_errors + print(" L2 error: ") + for v in eachvariable(equations) + @printf(" % 10.8e", l2_error[v]) + @printf(io, " % 10.8e", l2_error[v]) + end + println() + end + + # Linf error + if :linf_error in analysis_errors + print(" Linf error: ") + for v in eachvariable(equations) + @printf(" % 10.8e", linf_error[v]) + @printf(io, " % 10.8e", linf_error[v]) + end + println() + end end - end - - # Residual (defined here as the vector maximum of the absolute values of the time derivatives) - if :residual in analysis_errors - mpi_print(" max(|Uₜ|): ") - for v in eachvariable(equations) - # Calculate maximum absolute value of Uₜ - res = maximum(abs, view(du, v, ..)) - if mpi_isparallel() - # TODO: Debugging, here is a type instability - global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm()) + + # Conservation error + if :conservation_error in analysis_errors + @unpack initial_state_integrals = analysis_callback + state_integrals = integrate(u_ode, semi) + if mpi_isroot() - res::eltype(du) = global_res[] + print(" |∑U - ∑U₀|: ") + for v in eachvariable(equations) + err = abs(state_integrals[v] - initial_state_integrals[v]) + @printf(" % 10.8e", err) + @printf(io, " % 10.8e", err) + end + println() end - end - if mpi_isroot() - @printf(" % 10.8e", res) - @printf(io, " % 10.8e", res) - end end - mpi_println() - end - - # L2/L∞ errors of the primitive variables - if :l2_error_primitive in analysis_errors || :linf_error_primitive in analysis_errors - l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, u_ode, t, analyzer, semi, cache_analysis) - if mpi_isroot() - print(" Variable: ") - for v in eachvariable(equations) - @printf(" %-14s", varnames(cons2prim, equations)[v]) - end - println() - - # L2 error - if :l2_error_primitive in analysis_errors - print(" L2 error prim.: ") + # Residual (defined here as the vector maximum of the absolute values of the time derivatives) + if :residual in analysis_errors + mpi_print(" max(|Uₜ|): ") for v in eachvariable(equations) - @printf("%10.8e ", l2_error_prim[v]) - @printf(io, " % 10.8e", l2_error_prim[v]) + # Calculate maximum absolute value of Uₜ + res = maximum(abs, view(du, v, ..)) + if mpi_isparallel() + # TODO: Debugging, here is a type instability + global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm()) + if mpi_isroot() + res::eltype(du) = global_res[] + end + end + if mpi_isroot() + @printf(" % 10.8e", res) + @printf(io, " % 10.8e", res) + end end - println() - end + mpi_println() + end - # L∞ error - if :linf_error_primitive in analysis_errors - print(" Linf error pri.:") - for v in eachvariable(equations) - @printf("%10.8e ", linf_error_prim[v]) - @printf(io, " % 10.8e", linf_error_prim[v]) + # L2/L∞ errors of the primitive variables + if :l2_error_primitive in analysis_errors || + :linf_error_primitive in analysis_errors + l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, u_ode, t, analyzer, + semi, cache_analysis) + + if mpi_isroot() + print(" Variable: ") + for v in eachvariable(equations) + @printf(" %-14s", varnames(cons2prim, equations)[v]) + end + println() + + # L2 error + if :l2_error_primitive in analysis_errors + print(" L2 error prim.: ") + for v in eachvariable(equations) + @printf("%10.8e ", l2_error_prim[v]) + @printf(io, " % 10.8e", l2_error_prim[v]) + end + println() + end + + # L∞ error + if :linf_error_primitive in analysis_errors + print(" Linf error pri.:") + for v in eachvariable(equations) + @printf("%10.8e ", linf_error_prim[v]) + @printf(io, " % 10.8e", linf_error_prim[v]) + end + println() + end end - println() - end end - end - # additional integrals - analyze_integrals(analysis_integrals, io, du, u, t, semi) + # additional integrals + analyze_integrals(analysis_integrals, io, du, u, t, semi) - return l2_error, linf_error + return l2_error, linf_error end - # Print level information only if AMR is enabled function print_amr_information(callbacks, mesh, solver, cache) - # Return early if there is nothing to print - uses_amr(callbacks) || return nothing - - levels = Vector{Int}(undef, nelements(solver, cache)) - min_level = typemax(Int) - max_level = typemin(Int) - for element in eachelement(solver, cache) - current_level = mesh.tree.levels[cache.elements.cell_ids[element]] - levels[element] = current_level - min_level = min(min_level, current_level) - max_level = max(max_level, current_level) - end - - for level = max_level:-1:min_level+1 - mpi_println(" ├── level $level: " * @sprintf("% 14d", count(==(level), levels))) - end - mpi_println(" └── level $min_level: " * @sprintf("% 14d", count(==(min_level), levels))) - - return nothing + # Return early if there is nothing to print + uses_amr(callbacks) || return nothing + + levels = Vector{Int}(undef, nelements(solver, cache)) + min_level = typemax(Int) + max_level = typemin(Int) + for element in eachelement(solver, cache) + current_level = mesh.tree.levels[cache.elements.cell_ids[element]] + levels[element] = current_level + min_level = min(min_level, current_level) + max_level = max(max_level, current_level) + end + + for level in max_level:-1:(min_level + 1) + mpi_println(" ├── level $level: " * + @sprintf("% 14d", count(==(level), levels))) + end + mpi_println(" └── level $min_level: " * + @sprintf("% 14d", count(==(min_level), levels))) + + return nothing end # Print level information only if AMR is enabled function print_amr_information(callbacks, mesh::P4estMesh, solver, cache) - # Return early if there is nothing to print - uses_amr(callbacks) || return nothing + # Return early if there is nothing to print + uses_amr(callbacks) || return nothing - elements_per_level = zeros(P4EST_MAXLEVEL + 1) + elements_per_level = zeros(P4EST_MAXLEVEL + 1) - for tree in unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees) - elements_per_level .+= tree.quadrants_per_level - end + for tree in unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees) + elements_per_level .+= tree.quadrants_per_level + end - # levels start at zero but Julia's standard indexing starts at 1 - min_level_1 = findfirst(i -> i > 0, elements_per_level) - max_level_1 = findlast(i -> i > 0, elements_per_level) + # levels start at zero but Julia's standard indexing starts at 1 + min_level_1 = findfirst(i -> i > 0, elements_per_level) + max_level_1 = findlast(i -> i > 0, elements_per_level) - # Check if there is at least one level with an element - if isnothing(min_level_1) || isnothing(max_level_1) - return nothing - end + # Check if there is at least one level with an element + if isnothing(min_level_1) || isnothing(max_level_1) + return nothing + end - min_level = min_level_1 - 1 - max_level = max_level_1 - 1 + min_level = min_level_1 - 1 + max_level = max_level_1 - 1 - for level = max_level:-1:min_level+1 - mpi_println(" ├── level $level: " * @sprintf("% 14d", elements_per_level[level + 1])) - end - mpi_println(" └── level $min_level: " * @sprintf("% 14d", elements_per_level[min_level + 1])) + for level in max_level:-1:(min_level + 1) + mpi_println(" ├── level $level: " * + @sprintf("% 14d", elements_per_level[level + 1])) + end + mpi_println(" └── level $min_level: " * + @sprintf("% 14d", elements_per_level[min_level + 1])) - return nothing + return nothing end - # Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming". -function analyze_integrals(analysis_integrals::NTuple{N,Any}, io, du, u, t, semi) where {N} - - # Extract the first analysis integral and process it; keep the remaining to be processed later - quantity = first(analysis_integrals) - remaining_quantities = Base.tail(analysis_integrals) - - res = analyze(quantity, du, u, t, semi) - if mpi_isroot() - @printf(" %-12s:", pretty_form_utf(quantity)) - @printf(" % 10.8e", res) - @printf(io, " % 10.8e", res) - end - mpi_println() - - # Recursively call this method with the unprocessed integrals - analyze_integrals(remaining_quantities, io, du, u, t, semi) - return nothing +function analyze_integrals(analysis_integrals::NTuple{N, Any}, io, du, u, t, + semi) where {N} + + # Extract the first analysis integral and process it; keep the remaining to be processed later + quantity = first(analysis_integrals) + remaining_quantities = Base.tail(analysis_integrals) + + res = analyze(quantity, du, u, t, semi) + if mpi_isroot() + @printf(" %-12s:", pretty_form_utf(quantity)) + @printf(" % 10.8e", res) + @printf(io, " % 10.8e", res) + end + mpi_println() + + # Recursively call this method with the unprocessed integrals + analyze_integrals(remaining_quantities, io, du, u, t, semi) + return nothing end # terminate the type-stable iteration over tuples function analyze_integrals(analysis_integrals::Tuple{}, io, du, u, t, semi) - nothing + nothing end - # used for error checks and EOC analysis -function (cb::DiscreteCallback{Condition,Affect!})(sol) where {Condition, Affect!<:AnalysisCallback} - analysis_callback = cb.affect! - semi = sol.prob.p - @unpack analyzer = analysis_callback - cache_analysis = analysis_callback.cache - - l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, cache_analysis) - (; l2=l2_error, linf=linf_error) -end +function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition, + Affect! <: + AnalysisCallback} + analysis_callback = cb.affect! + semi = sol.prob.p + @unpack analyzer = analysis_callback + cache_analysis = analysis_callback.cache + l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, + cache_analysis) + (; l2 = l2_error, linf = linf_error) +end # some common analysis_integrals # to support another analysis integral, you can overload # Trixi.analyze, Trixi.pretty_form_utf, Trixi.pretty_form_ascii function analyze(quantity, du, u, t, semi::AbstractSemidiscretization) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - analyze(quantity, du, u, t, mesh, equations, solver, cache) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + analyze(quantity, du, u, t, mesh, equations, solver, cache) end function analyze(quantity, du, u, t, mesh, equations, solver, cache) - integrate(quantity, u, mesh, equations, solver, cache, normalize=true) + integrate(quantity, u, mesh, equations, solver, cache, normalize = true) end pretty_form_utf(quantity) = get_name(quantity) pretty_form_ascii(quantity) = get_name(quantity) - # Special analyze for `SemidiscretizationHyperbolicParabolic` such that # precomputed gradients are available. For now only implemented for the `enstrophy` #!!! warning "Experimental code" # This code is experimental and may be changed or removed in any future release. -function analyze(quantity::typeof(enstrophy), du, u, t, semi::SemidiscretizationHyperbolicParabolic) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - equations_parabolic = semi.equations_parabolic - cache_parabolic = semi.cache_parabolic - analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache, cache_parabolic) +function analyze(quantity::typeof(enstrophy), du, u, t, + semi::SemidiscretizationHyperbolicParabolic) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + equations_parabolic = semi.equations_parabolic + cache_parabolic = semi.cache_parabolic + analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache, + cache_parabolic) end -function analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache, cache_parabolic) - integrate(quantity, u, mesh, equations, equations_parabolic, solver, cache, cache_parabolic, normalize=true) +function analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, + cache, cache_parabolic) + integrate(quantity, u, mesh, equations, equations_parabolic, solver, cache, + cache_parabolic, normalize = true) end - function entropy_timederivative end pretty_form_utf(::typeof(entropy_timederivative)) = "∑∂S/∂U ⋅ Uₜ" pretty_form_ascii(::typeof(entropy_timederivative)) = "dsdu_ut" @@ -604,11 +622,8 @@ pretty_form_ascii(::Val{:linf_divb}) = "linf_divb" pretty_form_utf(::typeof(lake_at_rest_error)) = "∑|H₀-(h+b)|" pretty_form_ascii(::typeof(lake_at_rest_error)) = "|H0-(h+b)|" - - end # @muladd - # specialized implementations specific to some solvers include("analysis_dg1d.jl") include("analysis_dg2d.jl") diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl index e92701dc1fb..d2613c325be 100644 --- a/src/callbacks_step/analysis_dg1d.jl +++ b/src/callbacks_step/analysis_dg1d.jl @@ -3,222 +3,226 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function create_cache_analysis(analyzer, mesh::TreeMesh{1}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer))) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer))) - return (; u_local, x_local) + return (; u_local, x_local) end - function create_cache_analysis(analyzer, mesh::StructuredMesh{1}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer))) - jacobian_local = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer))) - - return (; u_local, x_local, jacobian_local) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer))) + jacobian_local = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer))) + + return (; u_local, x_local, jacobian_local) end - function calc_error_norms(func, u, t, analyzer, mesh::StructuredMesh{1}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements - @unpack u_local, x_local, jacobian_local = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) - linf_error = copy(l2_error) - total_volume = zero(real(mesh)) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element)) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, element)) - multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, element))) - - # Calculate errors at each analysis node - @. jacobian_local = abs(jacobian_local) - - for i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i), equations) - l2_error += diff.^2 * (weights[i] * jacobian_local[i]) - linf_error = @. max(linf_error, abs(diff)) - total_volume += weights[i] * jacobian_local[i] + @unpack vandermonde, weights = analyzer + @unpack node_coordinates, inverse_jacobian = cache.elements + @unpack u_local, x_local, jacobian_local = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) + linf_error = copy(l2_error) + total_volume = zero(real(mesh)) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element)) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, element)) + multiply_scalar_dimensionwise!(jacobian_local, vandermonde, + inv.(view(inverse_jacobian, :, element))) + + # Calculate errors at each analysis node + @. jacobian_local = abs(jacobian_local) + + for i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, + equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i), equations) + l2_error += diff .^ 2 * (weights[i] * jacobian_local[i]) + linf_error = @. max(linf_error, abs(diff)) + total_volume += weights[i] * jacobian_local[i] + end end - end - # For L2 error, divide by total volume - l2_error = @. sqrt(l2_error / total_volume) + # For L2 error, divide by total volume + l2_error = @. sqrt(l2_error / total_volume) - return l2_error, linf_error + return l2_error, linf_error end - function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{1}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates = cache.elements - @unpack u_local, x_local = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element)) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, element)) - - # Calculate errors at each analysis node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i), equations) - l2_error += diff.^2 * (weights[i] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + @unpack vandermonde, weights = analyzer + @unpack node_coordinates = cache.elements + @unpack u_local, x_local = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element)) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, element)) + + # Calculate errors at each analysis node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, + equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i), equations) + l2_error += diff .^ 2 * (weights[i] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end end - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - function integrate_via_indices(func::Func, u, mesh::StructuredMesh{1}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, equations, dg, args...)) - total_volume = zero(real(mesh)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - for i in eachnode(dg) - jacobian_volume = abs(inv(cache.elements.inverse_jacobian[i, element])) - integral += jacobian_volume * weights[i] * func(u, i, element, equations, dg, args...) - total_volume += jacobian_volume * weights[i] + args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, equations, dg, args...)) + total_volume = zero(real(mesh)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + for i in eachnode(dg) + jacobian_volume = abs(inv(cache.elements.inverse_jacobian[i, element])) + integral += jacobian_volume * weights[i] * + func(u, i, element, equations, dg, args...) + total_volume += jacobian_volume * weights[i] + end + end + # Normalize with total volume + if normalize + integral = integral / total_volume end - end - # Normalize with total volume - if normalize - integral = integral / total_volume - end - return integral + return integral end - function integrate_via_indices(func::Func, u, mesh::TreeMesh{1}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * func(u, i, element, equations, dg, args...) + args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * + func(u, i, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end - function integrate(func::Func, u, - mesh::Union{TreeMesh{1},StructuredMesh{1}}, - equations, dg::DG, cache; normalize=true) where {Func} - integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, element, equations, dg - u_local = get_node_vars(u, equations, dg, i, element) - return func(u_local, equations) - end + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + equations, dg::DG, cache; normalize = true) where {Func} + integrate_via_indices(u, mesh, equations, dg, cache; + normalize = normalize) do u, i, element, equations, dg + u_local = get_node_vars(u, equations, dg, i, element) + return func(u_local, equations) + end end - function analyze(::typeof(entropy_timederivative), du, u, t, - mesh::Union{TreeMesh{1},StructuredMesh{1}}, equations, dg::DG, cache) - # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ - integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, element, equations, dg, du - u_node = get_node_vars(u, equations, dg, i, element) - du_node = get_node_vars(du, equations, dg, i, element) - dot(cons2entropy(u_node, equations), du_node) - end + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, dg::DG, cache) + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ + integrate_via_indices(u, mesh, equations, dg, cache, + du) do u, i, element, equations, dg, du + u_node = get_node_vars(u, equations, dg, i, element) + du_node = get_node_vars(du, equations, dg, i, element) + dot(cons2entropy(u_node, equations), du_node) + end end function analyze(::Val{:l2_divb}, du, u, t, mesh::TreeMesh{1}, equations::IdealGlmMhdEquations1D, dg::DG, cache) - integrate_via_indices(u, mesh, equations, dg, cache, dg.basis.derivative_matrix) do u, i, element, equations, dg, derivative_matrix - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += derivative_matrix[i, k] * u[6, k, element] - end - divb *= cache.elements.inverse_jacobian[element] - divb^2 - end |> sqrt + integrate_via_indices(u, mesh, equations, dg, cache, + dg.basis.derivative_matrix) do u, i, element, equations, dg, + derivative_matrix + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += derivative_matrix[i, k] * u[6, k, element] + end + divb *= cache.elements.inverse_jacobian[element] + divb^2 + end |> sqrt end function analyze(::Val{:linf_divb}, du, u, t, mesh::TreeMesh{1}, equations::IdealGlmMhdEquations1D, dg::DG, cache) - @unpack derivative_matrix, weights = dg.basis - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for i in eachnode(dg) - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += derivative_matrix[i, k] * u[6, k, element] - end - divb *= cache.elements.inverse_jacobian[element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for i in eachnode(dg) + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += derivative_matrix[i, k] * u[6, k, element] + end + divb *= cache.elements.inverse_jacobian[element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end - - end # @muladd diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index 453474675f1..6c74e172e46 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -3,329 +3,356 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function create_cache_analysis(analyzer, mesh::TreeMesh{2}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - u_tmp1 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - x_tmp1 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - - return (; u_local, u_tmp1, x_local, x_tmp1) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + u_tmp1 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + x_tmp1 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg))) + + return (; u_local, u_tmp1, x_local, x_tmp1) end - -function create_cache_analysis(analyzer, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, +function create_cache_analysis(analyzer, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - u_tmp1 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - x_tmp1 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - jacobian_local = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - jacobian_tmp1 = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - - return (; u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + u_tmp1 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + x_tmp1 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg))) + jacobian_local = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + jacobian_tmp1 = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + + return (; u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1) end - function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{2}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates = cache.elements - @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - # Accumulate L2 error on the element first so that the order of summation is the - # same as in the parallel case to ensure exact equality. This facilitates easier parallel - # development and debugging (see - # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details). - for element in eachelement(dg, cache) - # Set up data structures for local element L2 error - l2_error_local = zero(l2_error) - - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1) - - # Calculate errors at each analysis node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations) - l2_error_local += diff.^2 * (weights[i] * weights[j] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + @unpack vandermonde, weights = analyzer + @unpack node_coordinates = cache.elements + @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + # Accumulate L2 error on the element first so that the order of summation is the + # same as in the parallel case to ensure exact equality. This facilitates easier parallel + # development and debugging (see + # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details). + for element in eachelement(dg, cache) + # Set up data structures for local element L2 error + l2_error_local = zero(l2_error) + + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, element), x_tmp1) + + # Calculate errors at each analysis node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), + t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j), equations) + l2_error_local += diff .^ 2 * (weights[i] * weights[j] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end + l2_error += l2_error_local end - l2_error += l2_error_local - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - function calc_error_norms(func, u, t, analyzer, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements - @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) - linf_error = copy(l2_error) - total_volume = zero(real(mesh)) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1) - multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, element)), jacobian_tmp1) - - # Calculate errors at each analysis node - @. jacobian_local = abs(jacobian_local) - - for j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * jacobian_local[i, j]) - linf_error = @. max(linf_error, abs(diff)) - total_volume += weights[i] * weights[j] * jacobian_local[i, j] + @unpack vandermonde, weights = analyzer + @unpack node_coordinates, inverse_jacobian = cache.elements + @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) + linf_error = copy(l2_error) + total_volume = zero(real(mesh)) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, element), x_tmp1) + multiply_scalar_dimensionwise!(jacobian_local, vandermonde, + inv.(view(inverse_jacobian, :, :, element)), + jacobian_tmp1) + + # Calculate errors at each analysis node + @. jacobian_local = abs(jacobian_local) + + for j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), + t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j), equations) + l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j]) + linf_error = @. max(linf_error, abs(diff)) + total_volume += weights[i] * weights[j] * jacobian_local[i, j] + end end - end - # For L2 error, divide by total volume - l2_error = @. sqrt(l2_error / total_volume) + # For L2 error, divide by total volume + l2_error = @. sqrt(l2_error / total_volume) - return l2_error, linf_error + return l2_error, linf_error end - function integrate_via_indices(func::Func, u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for j in eachnode(dg), i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) + args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for j in eachnode(dg), i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * weights[j] * + func(u, i, j, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end - function integrate_via_indices(func::Func, u, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations, - dg::DGSEM, cache, args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, equations, dg, args...)) - total_volume = zero(real(mesh)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) - integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) - total_volume += volume_jacobian * weights[i] * weights[j] + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, + dg::DGSEM, cache, args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, equations, dg, args...)) + total_volume = zero(real(mesh)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + integral += volume_jacobian * weights[i] * weights[j] * + func(u, i, j, element, equations, dg, args...) + total_volume += volume_jacobian * weights[i] * weights[j] + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume - end + # Normalize with total volume + if normalize + integral = integral / total_volume + end - return integral + return integral end - function integrate(func::Func, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, - equations, dg::DG, cache; normalize=true) where {Func} - integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, element, equations, dg - u_local = get_node_vars(u, equations, dg, i, j, element) - return func(u_local, equations) - end + mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, + equations, dg::DG, cache; normalize = true) where {Func} + integrate_via_indices(u, mesh, equations, dg, cache; + normalize = normalize) do u, i, j, element, equations, dg + u_local = get_node_vars(u, equations, dg, i, j, element) + return func(u_local, equations) + end end - function analyze(::typeof(entropy_timederivative), du, u, t, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, dg::DG, cache) - # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ - integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, element, equations, dg, du - u_node = get_node_vars(u, equations, dg, i, j, element) - du_node = get_node_vars(du, equations, dg, i, j, element) - dot(cons2entropy(u_node, equations), du_node) - end + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ + integrate_via_indices(u, mesh, equations, dg, cache, + du) do u, i, j, element, equations, dg, du + u_node = get_node_vars(u, equations, dg, i, j, element) + du_node = get_node_vars(du, equations, dg, i, j, element) + dot(cons2entropy(u_node, equations), du_node) + end end - - function analyze(::Val{:l2_divb}, du, u, t, mesh::TreeMesh{2}, equations::IdealGlmMhdEquations2D, dg::DGSEM, cache) - integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * u[6, k, j, element] + - derivative_matrix[j, k] * u[7, i, k, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - divb^2 - end |> sqrt + integrate_via_indices(u, mesh, equations, dg, cache, cache, + dg.basis.derivative_matrix) do u, i, j, element, equations, + dg, cache, derivative_matrix + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * u[6, k, j, element] + + derivative_matrix[j, k] * u[7, i, k, element]) + end + divb *= cache.elements.inverse_jacobian[element] + divb^2 + end |> sqrt end function analyze(::Val{:l2_divb}, du, u, t, mesh::TreeMesh{2}, equations::IdealGlmMhdMulticomponentEquations2D, dg::DG, cache) - integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * u[5, k, j, element] + - derivative_matrix[j, k] * u[6, i, k, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - divb^2 - end |> sqrt + integrate_via_indices(u, mesh, equations, dg, cache, cache, + dg.basis.derivative_matrix) do u, i, j, element, equations, + dg, cache, derivative_matrix + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * u[5, k, j, element] + + derivative_matrix[j, k] * u[6, i, k, element]) + end + divb *= cache.elements.inverse_jacobian[element] + divb^2 + end |> sqrt end function analyze(::Val{:l2_divb}, du, u, t, - mesh::Union{StructuredMesh{2},UnstructuredMesh2D,P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations::IdealGlmMhdEquations2D, dg::DGSEM, cache) - @unpack contravariant_vectors = cache.elements - integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix - divb = zero(eltype(u)) - # Get the contravariant vectors Ja^1 and Ja^2 - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) - # Compute the transformed divergence - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) + - derivative_matrix[j, k] * (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]) ) - end - divb *= cache.elements.inverse_jacobian[i, j, element] - divb^2 - end |> sqrt + @unpack contravariant_vectors = cache.elements + integrate_via_indices(u, mesh, equations, dg, cache, cache, + dg.basis.derivative_matrix) do u, i, j, element, equations, + dg, cache, derivative_matrix + divb = zero(eltype(u)) + # Get the contravariant vectors Ja^1 and Ja^2 + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) + # Compute the transformed divergence + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * + (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) + + derivative_matrix[j, k] * + (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element])) + end + divb *= cache.elements.inverse_jacobian[i, j, element] + divb^2 + end |> sqrt end - function analyze(::Val{:linf_divb}, du, u, t, mesh::TreeMesh{2}, equations::IdealGlmMhdEquations2D, dg::DGSEM, cache) - @unpack derivative_matrix, weights = dg.basis - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * u[6, k, j, element] + - derivative_matrix[j, k] * u[7, i, k, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * u[6, k, j, element] + + derivative_matrix[j, k] * u[7, i, k, element]) + end + divb *= cache.elements.inverse_jacobian[element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end function analyze(::Val{:linf_divb}, du, u, t, mesh::TreeMesh{2}, equations::IdealGlmMhdMulticomponentEquations2D, dg::DG, cache) - @unpack derivative_matrix, weights = dg.basis - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - divb = zero(eltype(u)) - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * u[5, k, j, element] + - derivative_matrix[j, k] * u[6, i, k, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + divb = zero(eltype(u)) + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * u[5, k, j, element] + + derivative_matrix[j, k] * u[6, i, k, element]) + end + divb *= cache.elements.inverse_jacobian[element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end function analyze(::Val{:linf_divb}, du, u, t, - mesh::Union{StructuredMesh{2},UnstructuredMesh2D,P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations::IdealGlmMhdEquations2D, dg::DGSEM, cache) - @unpack derivative_matrix, weights = dg.basis - @unpack contravariant_vectors = cache.elements - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - divb = zero(eltype(u)) - # Get the contravariant vectors Ja^1 and Ja^2 - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) - # Compute the transformed divergence - for k in eachnode(dg) - divb += ( derivative_matrix[i, k] * (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) + - derivative_matrix[j, k] * (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]) ) - end - divb *= cache.elements.inverse_jacobian[i, j, element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + @unpack contravariant_vectors = cache.elements + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + divb = zero(eltype(u)) + # Get the contravariant vectors Ja^1 and Ja^2 + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, + element) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, + element) + # Compute the transformed divergence + for k in eachnode(dg) + divb += (derivative_matrix[i, k] * + (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) + + derivative_matrix[j, k] * + (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element])) + end + divb *= cache.elements.inverse_jacobian[i, j, element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end - - end # @muladd diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl index 2d382604030..a04bf732604 100644 --- a/src/callbacks_step/analysis_dg2d_parallel.jl +++ b/src/callbacks_step/analysis_dg2d_parallel.jl @@ -3,200 +3,209 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function calc_error_norms(func, u, t, analyzer, mesh::ParallelTreeMesh{2}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer, - mesh, equations, initial_condition, - dg, cache, cache_analysis) - - # Collect local error norms for each element on root process. That way, when aggregating the L2 - # errors, the order of summation is the same as in the serial case to ensure exact equality. - # This facilitates easier parallel development and debugging (see - # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details). - # Note that this approach does not scale. - if mpi_isroot() - global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global) - global_linf_errors = similar(global_l2_errors) - - n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array - l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank) - linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank) - MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm()) - MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm()) - else - MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm()) - MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm()) - end - - # Aggregate element error norms on root process - if mpi_isroot() - # sum(global_l2_errors) does not produce the same result as in the serial case, thus a - # hand-written loop is used - l2_error = zero(eltype(global_l2_errors)) - for error in global_l2_errors - l2_error += error + l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer, + mesh, equations, + initial_condition, + dg, cache, cache_analysis) + + # Collect local error norms for each element on root process. That way, when aggregating the L2 + # errors, the order of summation is the same as in the serial case to ensure exact equality. + # This facilitates easier parallel development and debugging (see + # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details). + # Note that this approach does not scale. + if mpi_isroot() + global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global) + global_linf_errors = similar(global_l2_errors) + + n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array + l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank) + linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank) + MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm()) + MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm()) + else + MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm()) + MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm()) end - linf_error = reduce((x, y) -> max.(x, y), global_linf_errors) - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) - else - l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors))) - linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors))) - end + # Aggregate element error norms on root process + if mpi_isroot() + # sum(global_l2_errors) does not produce the same result as in the serial case, thus a + # hand-written loop is used + l2_error = zero(eltype(global_l2_errors)) + for error in global_l2_errors + l2_error += error + end + linf_error = reduce((x, y) -> max.(x, y), global_linf_errors) + + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) + else + l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors))) + linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors))) + end - return l2_error, linf_error + return l2_error, linf_error end function calc_error_norms_per_element(func, u, t, analyzer, - mesh::ParallelTreeMesh{2}, equations, initial_condition, + mesh::ParallelTreeMesh{2}, equations, + initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates = cache.elements - @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis - - # Set up data structures - T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))) - l2_errors = zeros(T, nelements(dg, cache)) - linf_errors = copy(l2_errors) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1) - - # Calculate errors at each analysis node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations) - l2_errors[element] += diff.^2 * (weights[i] * weights[j] * volume_jacobian_) - linf_errors[element] = @. max(linf_errors[element], abs(diff)) + @unpack vandermonde, weights = analyzer + @unpack node_coordinates = cache.elements + @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis + + # Set up data structures + T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))) + l2_errors = zeros(T, nelements(dg, cache)) + linf_errors = copy(l2_errors) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, element), x_tmp1) + + # Calculate errors at each analysis node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), + t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j), equations) + l2_errors[element] += diff .^ 2 * + (weights[i] * weights[j] * volume_jacobian_) + linf_errors[element] = @. max(linf_errors[element], abs(diff)) + end end - end - return l2_errors, linf_errors + return l2_errors, linf_errors end - function calc_error_norms(func, u, t, analyzer, mesh::ParallelP4estMesh{2}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements - @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) - linf_error = copy(l2_error) - volume = zero(real(mesh)) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1) - multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, element)), jacobian_tmp1) - - # Calculate errors at each analysis node - @. jacobian_local = abs(jacobian_local) - - for j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * jacobian_local[i, j]) - linf_error = @. max(linf_error, abs(diff)) - volume += weights[i] * weights[j] * jacobian_local[i, j] + @unpack vandermonde, weights = analyzer + @unpack node_coordinates, inverse_jacobian = cache.elements + @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) + linf_error = copy(l2_error) + volume = zero(real(mesh)) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, element), x_tmp1) + multiply_scalar_dimensionwise!(jacobian_local, vandermonde, + inv.(view(inverse_jacobian, :, :, element)), + jacobian_tmp1) + + # Calculate errors at each analysis node + @. jacobian_local = abs(jacobian_local) + + for j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), + t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j), equations) + l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j]) + linf_error = @. max(linf_error, abs(diff)) + volume += weights[i] * weights[j] * jacobian_local[i, j] + end end - end - - # Accumulate local results on root process - global_l2_error = Vector(l2_error) - global_linf_error = Vector(linf_error) - MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) - MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) - total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) - if mpi_isroot() - l2_error = convert(typeof(l2_error), global_l2_error) - linf_error = convert(typeof(linf_error), global_linf_error) - # For L2 error, divide by total volume - l2_error = @. sqrt(l2_error / total_volume) - else - l2_error = convert(typeof(l2_error), NaN * global_l2_error) - linf_error = convert(typeof(linf_error), NaN * global_linf_error) - end - - return l2_error, linf_error -end + # Accumulate local results on root process + global_l2_error = Vector(l2_error) + global_linf_error = Vector(linf_error) + MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) + total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) + if mpi_isroot() + l2_error = convert(typeof(l2_error), global_l2_error) + linf_error = convert(typeof(linf_error), global_linf_error) + # For L2 error, divide by total volume + l2_error = @. sqrt(l2_error / total_volume) + else + l2_error = convert(typeof(l2_error), NaN * global_l2_error) + linf_error = convert(typeof(linf_error), NaN * global_linf_error) + end + + return l2_error, linf_error +end function integrate_via_indices(func::Func, u, mesh::ParallelTreeMesh{2}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - # call the method accepting a general `mesh::TreeMesh{2}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - local_integral = invoke(integrate_via_indices, - Tuple{typeof(func), typeof(u), TreeMesh{2}, typeof(equations), - typeof(dg), typeof(cache), map(typeof, args)...}, - func, u, mesh, equations, dg, cache, args..., normalize=normalize) - - # OBS! Global results are only calculated on MPI root, all other domains receive `nothing` - global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm()) - if mpi_isroot() - integral = convert(typeof(local_integral), global_integral[]) - else - integral = convert(typeof(local_integral), NaN * local_integral) - end - - return integral -end + args...; normalize = true) where {Func} + # call the method accepting a general `mesh::TreeMesh{2}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + local_integral = invoke(integrate_via_indices, + Tuple{typeof(func), typeof(u), TreeMesh{2}, + typeof(equations), + typeof(dg), typeof(cache), map(typeof, args)...}, + func, u, mesh, equations, dg, cache, args..., + normalize = normalize) + + # OBS! Global results are only calculated on MPI root, all other domains receive `nothing` + global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm()) + if mpi_isroot() + integral = convert(typeof(local_integral), global_integral[]) + else + integral = convert(typeof(local_integral), NaN * local_integral) + end + return integral +end function integrate_via_indices(func::Func, u, mesh::ParallelP4estMesh{2}, equations, - dg::DGSEM, cache, args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the - # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096. - integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, equations, dg, args...)) - volume = zero(real(mesh)) - - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) - integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) - volume += volume_jacobian * weights[i] * weights[j] + dg::DGSEM, cache, args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the + # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096. + integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, + equations, dg, args...)) + volume = zero(real(mesh)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + integral += volume_jacobian * weights[i] * weights[j] * + func(u, i, j, element, equations, dg, args...) + volume += volume_jacobian * weights[i] * weights[j] + end end - end - - global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) - total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) - if mpi_isroot() - integral = convert(typeof(integral), global_integral[]) - else - integral = convert(typeof(integral), NaN * integral) - total_volume = volume # non-root processes receive nothing from reduce -> overwrite - end - - # Normalize with total volume - if normalize - integral = integral / total_volume - end - - return integral -end + global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) + total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) + if mpi_isroot() + integral = convert(typeof(integral), global_integral[]) + else + integral = convert(typeof(integral), NaN * integral) + total_volume = volume # non-root processes receive nothing from reduce -> overwrite + end + # Normalize with total volume + if normalize + integral = integral / total_volume + end + + return integral +end end # @muladd diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 77cf1f819ea..76aba813fab 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -3,319 +3,368 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function create_cache_analysis(analyzer, mesh::TreeMesh{3}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - u_tmp1 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) - u_tmp2 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - x_tmp1 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) - x_tmp2 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - - return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) + u_tmp1 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) + u_tmp2 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) + x_tmp1 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) + x_tmp2 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + + return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2) end - function create_cache_analysis(analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations, dg::DG, cache, RealT, uEltype) - # pre-allocate buffers - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - u_local = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - u_tmp1 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) - u_tmp2 = StrideArray(undef, uEltype, - StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - x_local = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - x_tmp1 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) - x_tmp2 = StrideArray(undef, RealT, - StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - jacobian_local = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) - jacobian_tmp1 = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) - jacobian_tmp2 = StrideArray(undef, RealT, - StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) - - return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2) + # pre-allocate buffers + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + u_local = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) + u_tmp1 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) + u_tmp2 = StrideArray(undef, uEltype, + StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + x_local = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer))) + x_tmp1 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(dg)), StaticInt(nnodes(dg))) + x_tmp2 = StrideArray(undef, RealT, + StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + jacobian_local = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer))) + jacobian_tmp1 = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), + StaticInt(nnodes(dg))) + jacobian_tmp2 = StrideArray(undef, RealT, + StaticInt(nnodes(analyzer)), + StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg))) + + return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, + jacobian_tmp1, jacobian_tmp2) end - function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{3}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates = cache.elements - @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), u_tmp1, u_tmp2) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2) - - # Calculate errors at each analysis node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + @unpack vandermonde, weights = analyzer + @unpack node_coordinates = cache.elements + @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), + u_tmp1, u_tmp2) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, :, element), x_tmp1, + x_tmp2) + + # Calculate errors at each analysis node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, + k), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j, k), equations) + l2_error += diff .^ 2 * + (weights[i] * weights[j] * weights[k] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end end - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - function calc_error_norms(func, u, t, analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements - @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) - linf_error = copy(l2_error) - total_volume = zero(real(mesh)) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), u_tmp1, u_tmp2) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2) - multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, :, element)), jacobian_tmp1, jacobian_tmp2) - - # Calculate errors at each analysis node - @. jacobian_local = abs(jacobian_local) - - for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]) - linf_error = @. max(linf_error, abs(diff)) - total_volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k] + @unpack vandermonde, weights = analyzer + @unpack node_coordinates, inverse_jacobian = cache.elements + @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) + linf_error = copy(l2_error) + total_volume = zero(real(mesh)) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), + u_tmp1, u_tmp2) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, :, element), x_tmp1, + x_tmp2) + multiply_scalar_dimensionwise!(jacobian_local, vandermonde, + inv.(view(inverse_jacobian, :, :, :, element)), + jacobian_tmp1, jacobian_tmp2) + + # Calculate errors at each analysis node + @. jacobian_local = abs(jacobian_local) + + for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, + k), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j, k), equations) + l2_error += diff .^ 2 * + (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]) + linf_error = @. max(linf_error, abs(diff)) + total_volume += weights[i] * weights[j] * weights[k] * + jacobian_local[i, j, k] + end end - end - # For L2 error, divide by total volume - l2_error = @. sqrt(l2_error / total_volume) + # For L2 error, divide by total volume + l2_error = @. sqrt(l2_error / total_volume) - return l2_error, linf_error + return l2_error, linf_error end - function integrate_via_indices(func::Func, u, mesh::TreeMesh{3}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) + args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * + func(u, i, j, k, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end - function integrate_via_indices(func::Func, u, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations, dg::DGSEM, cache, - args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) - total_volume = zero(real(mesh)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) - integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) - total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] + args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) + total_volume = zero(real(mesh)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + integral += volume_jacobian * weights[i] * weights[j] * weights[k] * + func(u, i, j, k, element, equations, dg, args...) + total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume - end + # Normalize with total volume + if normalize + integral = integral / total_volume + end - return integral + return integral end - function integrate(func::Func, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, - equations, dg::DG, cache; normalize=true) where {Func} - integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, k, element, equations, dg - u_local = get_node_vars(u, equations, dg, i, j, k, element) - return func(u_local, equations) - end + equations, dg::DG, cache; normalize = true) where {Func} + integrate_via_indices(u, mesh, equations, dg, cache; + normalize = normalize) do u, i, j, k, element, equations, dg + u_local = get_node_vars(u, equations, dg, i, j, k, element) + return func(u_local, equations) + end end - function integrate(func::Func, u, mesh::TreeMesh{3}, equations, equations_parabolic, dg::DGSEM, - cache, cache_parabolic; normalize=true) where {Func} - gradients_x, gradients_y, gradients_z = cache_parabolic.gradients - integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, k, element, equations, dg - u_local = get_node_vars(u, equations, dg, i, j, k, element) - gradients_1_local = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k, element) - gradients_2_local = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k, element) - gradients_3_local = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k, element) - return func(u_local, (gradients_1_local, gradients_2_local, gradients_3_local), equations_parabolic) - end + cache, cache_parabolic; normalize = true) where {Func} + gradients_x, gradients_y, gradients_z = cache_parabolic.gradients + integrate_via_indices(u, mesh, equations, dg, cache; + normalize = normalize) do u, i, j, k, element, equations, dg + u_local = get_node_vars(u, equations, dg, i, j, k, element) + gradients_1_local = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k, + element) + gradients_2_local = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k, + element) + gradients_3_local = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k, + element) + return func(u_local, (gradients_1_local, gradients_2_local, gradients_3_local), + equations_parabolic) + end end - function analyze(::typeof(entropy_timederivative), du, u, t, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations, dg::DG, cache) - # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ - integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du - u_node = get_node_vars(u, equations, dg, i, j, k, element) - du_node = get_node_vars(du, equations, dg, i, j, k, element) - dot(cons2entropy(u_node, equations), du_node) - end + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ + integrate_via_indices(u, mesh, equations, dg, cache, + du) do u, i, j, k, element, equations, dg, du + u_node = get_node_vars(u, equations, dg, i, j, k, element) + du_node = get_node_vars(du, equations, dg, i, j, k, element) + dot(cons2entropy(u_node, equations), du_node) + end end - - function analyze(::Val{:l2_divb}, du, u, t, mesh::TreeMesh{3}, equations::IdealGlmMhdEquations3D, dg::DGSEM, cache) - integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, k, element, equations, dg, cache, derivative_matrix - divb = zero(eltype(u)) - for l in eachnode(dg) - divb += ( derivative_matrix[i, l] * u[6, l, j, k, element] + - derivative_matrix[j, l] * u[7, i, l, k, element] + - derivative_matrix[k, l] * u[8, i, j, l, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - divb^2 - end |> sqrt + integrate_via_indices(u, mesh, equations, dg, cache, cache, + dg.basis.derivative_matrix) do u, i, j, k, element, equations, + dg, cache, derivative_matrix + divb = zero(eltype(u)) + for l in eachnode(dg) + divb += (derivative_matrix[i, l] * u[6, l, j, k, element] + + derivative_matrix[j, l] * u[7, i, l, k, element] + + derivative_matrix[k, l] * u[8, i, j, l, element]) + end + divb *= cache.elements.inverse_jacobian[element] + divb^2 + end |> sqrt end function analyze(::Val{:l2_divb}, du, u, t, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations::IdealGlmMhdEquations3D, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + equations::IdealGlmMhdEquations3D, dg::DGSEM, cache) - @unpack contravariant_vectors = cache.elements - integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, k, element, equations, dg, cache, derivative_matrix - divb = zero(eltype(u)) - # Get the contravariant vectors Ja^1, Ja^2, and Ja^3 - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - # Compute the transformed divergence - for l in eachnode(dg) - divb += ( derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) + - derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) + - derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element]) ) - end - divb *= cache.elements.inverse_jacobian[i, j, k, element] - divb^2 - end |> sqrt + @unpack contravariant_vectors = cache.elements + integrate_via_indices(u, mesh, equations, dg, cache, cache, + dg.basis.derivative_matrix) do u, i, j, k, element, equations, + dg, cache, derivative_matrix + divb = zero(eltype(u)) + # Get the contravariant vectors Ja^1, Ja^2, and Ja^3 + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, + element) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, + element) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, + element) + # Compute the transformed divergence + for l in eachnode(dg) + divb += (derivative_matrix[i, l] * + (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] + + Ja13 * u[8, l, j, k, element]) + + derivative_matrix[j, l] * + (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] + + Ja23 * u[8, i, l, k, element]) + + derivative_matrix[k, l] * + (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] + + Ja33 * u[8, i, j, l, element])) + end + divb *= cache.elements.inverse_jacobian[i, j, k, element] + divb^2 + end |> sqrt end - function analyze(::Val{:linf_divb}, du, u, t, mesh::TreeMesh{3}, equations::IdealGlmMhdEquations3D, dg::DGSEM, cache) - @unpack derivative_matrix, weights = dg.basis - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - divb = zero(eltype(u)) - for l in eachnode(dg) - divb += ( derivative_matrix[i, l] * u[6, l, j, k, element] + - derivative_matrix[j, l] * u[7, i, l, k, element] + - derivative_matrix[k, l] * u[8, i, j, l, element] ) - end - divb *= cache.elements.inverse_jacobian[element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + divb = zero(eltype(u)) + for l in eachnode(dg) + divb += (derivative_matrix[i, l] * u[6, l, j, k, element] + + derivative_matrix[j, l] * u[7, i, l, k, element] + + derivative_matrix[k, l] * u[8, i, j, l, element]) + end + divb *= cache.elements.inverse_jacobian[element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end function analyze(::Val{:linf_divb}, du, u, t, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations::IdealGlmMhdEquations3D, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + equations::IdealGlmMhdEquations3D, dg::DGSEM, cache) - @unpack derivative_matrix, weights = dg.basis - @unpack contravariant_vectors = cache.elements - - # integrate over all elements to get the divergence-free condition errors - linf_divb = zero(eltype(u)) - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - divb = zero(eltype(u)) - # Get the contravariant vectors Ja^1, Ja^2, and Ja^3 - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - # Compute the transformed divergence - for l in eachnode(dg) - divb += ( derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) + - derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) + - derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element]) ) - end - divb *= cache.elements.inverse_jacobian[i, j, k, element] - linf_divb = max(linf_divb, abs(divb)) + @unpack derivative_matrix, weights = dg.basis + @unpack contravariant_vectors = cache.elements + + # integrate over all elements to get the divergence-free condition errors + linf_divb = zero(eltype(u)) + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + divb = zero(eltype(u)) + # Get the contravariant vectors Ja^1, Ja^2, and Ja^3 + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + k, element) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + k, element) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + k, element) + # Compute the transformed divergence + for l in eachnode(dg) + divb += (derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] + + Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) + + derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] + + Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) + + derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] + + Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element])) + end + divb *= cache.elements.inverse_jacobian[i, j, k, element] + linf_divb = max(linf_divb, abs(divb)) + end end - end - return linf_divb + return linf_divb end - - end # @muladd diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl index 058960dd63a..d8756d91c9d 100644 --- a/src/callbacks_step/analysis_dg3d_parallel.jl +++ b/src/callbacks_step/analysis_dg3d_parallel.jl @@ -3,95 +3,102 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function calc_error_norms(func, u, t, analyzer, mesh::ParallelP4estMesh{3}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) - @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements - @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) - linf_error = copy(l2_error) - volume = zero(real(mesh)) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Interpolate solution and node locations to analysis nodes - multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), u_tmp1, u_tmp2) - multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2) - multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, :, element)), jacobian_tmp1, jacobian_tmp2) - - # Calculate errors at each analysis node - @. jacobian_local = abs(jacobian_local) - - for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations) - diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]) - linf_error = @. max(linf_error, abs(diff)) - volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k] + @unpack vandermonde, weights = analyzer + @unpack node_coordinates, inverse_jacobian = cache.elements + @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) + linf_error = copy(l2_error) + volume = zero(real(mesh)) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Interpolate solution and node locations to analysis nodes + multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element), + u_tmp1, u_tmp2) + multiply_dimensionwise!(x_local, vandermonde, + view(node_coordinates, :, :, :, :, element), x_tmp1, + x_tmp2) + multiply_scalar_dimensionwise!(jacobian_local, vandermonde, + inv.(view(inverse_jacobian, :, :, :, element)), + jacobian_tmp1, jacobian_tmp2) + + # Calculate errors at each analysis node + @. jacobian_local = abs(jacobian_local) + + for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, + k), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u_local, equations, dg, i, j, k), equations) + l2_error += diff .^ 2 * + (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]) + linf_error = @. max(linf_error, abs(diff)) + volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k] + end end - end - # Accumulate local results on root process - global_l2_error = Vector(l2_error) - global_linf_error = Vector(linf_error) - MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) - MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) - total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) - if mpi_isroot() - l2_error = convert(typeof(l2_error), global_l2_error) - linf_error = convert(typeof(linf_error), global_linf_error) - # For L2 error, divide by total volume - l2_error = @. sqrt(l2_error / total_volume) - else - l2_error = convert(typeof(l2_error), NaN * global_l2_error) - linf_error = convert(typeof(linf_error), NaN * global_linf_error) - end + # Accumulate local results on root process + global_l2_error = Vector(l2_error) + global_linf_error = Vector(linf_error) + MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) + total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) + if mpi_isroot() + l2_error = convert(typeof(l2_error), global_l2_error) + linf_error = convert(typeof(linf_error), global_linf_error) + # For L2 error, divide by total volume + l2_error = @. sqrt(l2_error / total_volume) + else + l2_error = convert(typeof(l2_error), NaN * global_l2_error) + linf_error = convert(typeof(linf_error), NaN * global_linf_error) + end - return l2_error, linf_error + return l2_error, linf_error end - function integrate_via_indices(func::Func, u, mesh::ParallelP4estMesh{3}, equations, - dg::DGSEM, cache, args...; normalize=true) where {Func} - @unpack weights = dg.basis - - # Initialize integral with zeros of the right shape - # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the - # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096. - integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1, equations, dg, args...)) - volume = zero(real(mesh)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) - integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) - volume += volume_jacobian * weights[i] * weights[j] * weights[k] + dg::DGSEM, cache, args...; normalize = true) where {Func} + @unpack weights = dg.basis + + # Initialize integral with zeros of the right shape + # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the + # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096. + integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1, + equations, dg, args...)) + volume = zero(real(mesh)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + integral += volume_jacobian * weights[i] * weights[j] * weights[k] * + func(u, i, j, k, element, equations, dg, args...) + volume += volume_jacobian * weights[i] * weights[j] * weights[k] + end end - end - global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) - total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) - if mpi_isroot() - integral = convert(typeof(integral), global_integral[]) - else - integral = convert(typeof(integral), NaN * integral) - total_volume = volume # non-root processes receive nothing from reduce -> overwrite - end + global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) + total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm()) + if mpi_isroot() + integral = convert(typeof(integral), global_integral[]) + else + integral = convert(typeof(integral), NaN * integral) + total_volume = volume # non-root processes receive nothing from reduce -> overwrite + end - # Normalize with total volume - if normalize - integral = integral / total_volume - end + # Normalize with total volume + if normalize + integral = integral / total_volume + end - return integral + return integral end - - end # @muladd diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl index 2fbd8eda87a..dc294de9e7b 100644 --- a/src/callbacks_step/analysis_dgmulti.jl +++ b/src/callbacks_step/analysis_dgmulti.jl @@ -3,89 +3,89 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function calc_error_norms(func, u, t, analyzer, mesh::DGMultiMesh{NDIMS}, equations, initial_condition, dg::DGMulti{NDIMS}, cache, cache_analysis) where {NDIMS} - rd = dg.basis - md = mesh.md - @unpack u_values = cache - - # interpolate u to quadrature points - apply_to_each_field(mul_by!(rd.Vq), u_values, u) - - component_l2_errors = zero(eltype(u_values)) - component_linf_errors = zero(eltype(u_values)) - for i in each_quad_node_global(mesh, dg, cache) - u_exact = initial_condition(SVector(getindex.(md.xyzq, i)), t, equations) - error_at_node = func(u_values[i], equations) - func(u_exact, equations) - component_l2_errors += md.wJq[i] * error_at_node.^2 - component_linf_errors = max.(component_linf_errors, abs.(error_at_node)) - end - total_volume = sum(md.wJq) - return sqrt.(component_l2_errors ./ total_volume), component_linf_errors + rd = dg.basis + md = mesh.md + @unpack u_values = cache + + # interpolate u to quadrature points + apply_to_each_field(mul_by!(rd.Vq), u_values, u) + + component_l2_errors = zero(eltype(u_values)) + component_linf_errors = zero(eltype(u_values)) + for i in each_quad_node_global(mesh, dg, cache) + u_exact = initial_condition(SVector(getindex.(md.xyzq, i)), t, equations) + error_at_node = func(u_values[i], equations) - func(u_exact, equations) + component_l2_errors += md.wJq[i] * error_at_node .^ 2 + component_linf_errors = max.(component_linf_errors, abs.(error_at_node)) + end + total_volume = sum(md.wJq) + return sqrt.(component_l2_errors ./ total_volume), component_linf_errors end function integrate(func::Func, u, mesh::DGMultiMesh, - equations, dg::DGMulti, cache; normalize=true) where {Func} - rd = dg.basis - md = mesh.md - @unpack u_values = cache - - # interpolate u to quadrature points - apply_to_each_field(mul_by!(rd.Vq), u_values, u) - - integral = sum(md.wJq .* func.(u_values, equations)) - if normalize == true - integral /= sum(md.wJq) - end - return integral + equations, dg::DGMulti, cache; normalize = true) where {Func} + rd = dg.basis + md = mesh.md + @unpack u_values = cache + + # interpolate u to quadrature points + apply_to_each_field(mul_by!(rd.Vq), u_values, u) + + integral = sum(md.wJq .* func.(u_values, equations)) + if normalize == true + integral /= sum(md.wJq) + end + return integral end function analyze(::typeof(entropy_timederivative), du, u, t, mesh::DGMultiMesh, equations, dg::DGMulti, cache) - - rd = dg.basis - md = mesh.md - @unpack u_values = cache - - # interpolate u, du to quadrature points - du_values = similar(u_values) # Todo: DGMulti. Can we move this to the analysis cache somehow? - apply_to_each_field(mul_by!(rd.Vq), du_values, du) - apply_to_each_field(mul_by!(rd.Vq), u_values, u) - - # compute ∫v(u) * du/dt = ∫dS/dt. We can directly compute v(u) instead of computing the entropy - # projection here, since the RHS will be projected to polynomials of degree N and testing with - # the L2 projection of v(u) would be equivalent to testing with v(u) due to the moment-preserving - # property of the L2 projection. - dS_dt = zero(eltype(first(du))) - for i in Base.OneTo(length(md.wJq)) - dS_dt += dot(cons2entropy(u_values[i], equations), du_values[i]) * md.wJq[i] - end - return dS_dt + rd = dg.basis + md = mesh.md + @unpack u_values = cache + + # interpolate u, du to quadrature points + du_values = similar(u_values) # Todo: DGMulti. Can we move this to the analysis cache somehow? + apply_to_each_field(mul_by!(rd.Vq), du_values, du) + apply_to_each_field(mul_by!(rd.Vq), u_values, u) + + # compute ∫v(u) * du/dt = ∫dS/dt. We can directly compute v(u) instead of computing the entropy + # projection here, since the RHS will be projected to polynomials of degree N and testing with + # the L2 projection of v(u) would be equivalent to testing with v(u) due to the moment-preserving + # property of the L2 projection. + dS_dt = zero(eltype(first(du))) + for i in Base.OneTo(length(md.wJq)) + dS_dt += dot(cons2entropy(u_values[i], equations), du_values[i]) * md.wJq[i] + end + return dS_dt end # This function is used in `analyze(::Val{:l2_divb},...)` and `analyze(::Val{:linf_divb},...)` function compute_local_divergence!(local_divergence, element, vector_field, mesh, dg::DGMulti, cache) - @unpack md = mesh - rd = dg.basis - uEltype = eltype(first(vector_field)) - - fill!(local_divergence, zero(uEltype)) - - # computes dU_i/dx_i = ∑_j dxhat_j/dx_i * dU_i / dxhat_j - # dU_i/dx_i is then accumulated into local_divergence. - # TODO: DGMulti. Extend to curved elements. - for i in eachdim(mesh) - for j in eachdim(mesh) - geometric_scaling = md.rstxyzJ[i, j][1, element] - jth_ref_derivative_matrix = rd.Drst[j] - mul!(local_divergence, jth_ref_derivative_matrix, vector_field[i], geometric_scaling, one(uEltype)) + @unpack md = mesh + rd = dg.basis + uEltype = eltype(first(vector_field)) + + fill!(local_divergence, zero(uEltype)) + + # computes dU_i/dx_i = ∑_j dxhat_j/dx_i * dU_i / dxhat_j + # dU_i/dx_i is then accumulated into local_divergence. + # TODO: DGMulti. Extend to curved elements. + for i in eachdim(mesh) + for j in eachdim(mesh) + geometric_scaling = md.rstxyzJ[i, j][1, element] + jth_ref_derivative_matrix = rd.Drst[j] + mul!(local_divergence, jth_ref_derivative_matrix, vector_field[i], + geometric_scaling, one(uEltype)) + end end - end end get_component(u::StructArray, i::Int) = StructArrays.component(u, i) @@ -94,101 +94,102 @@ get_component(u::AbstractArray{<:SVector}, i::Int) = getindex.(u, i) function analyze(::Val{:l2_divb}, du, u, t, mesh::DGMultiMesh, equations::IdealGlmMhdEquations2D, dg::DGMulti, cache) - @unpack md = mesh - rd = dg.basis - B1 = get_component(u, 6) - B2 = get_component(u, 7) - B = (B1, B2) - - uEltype = eltype(B1) - l2norm_divB = zero(uEltype) - local_divB = zeros(uEltype, size(B1, 1)) - for e in eachelement(mesh, dg, cache) - compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache) - - # TODO: DGMulti. Extend to curved elements. - # compute L2 norm squared via J[1, e] * u' * M * u - local_l2norm_divB = md.J[1, e] * dot(local_divB, rd.M, local_divB) - l2norm_divB += local_l2norm_divB - end + @unpack md = mesh + rd = dg.basis + B1 = get_component(u, 6) + B2 = get_component(u, 7) + B = (B1, B2) + + uEltype = eltype(B1) + l2norm_divB = zero(uEltype) + local_divB = zeros(uEltype, size(B1, 1)) + for e in eachelement(mesh, dg, cache) + compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache) + + # TODO: DGMulti. Extend to curved elements. + # compute L2 norm squared via J[1, e] * u' * M * u + local_l2norm_divB = md.J[1, e] * dot(local_divB, rd.M, local_divB) + l2norm_divB += local_l2norm_divB + end - return sqrt(l2norm_divB) + return sqrt(l2norm_divB) end function analyze(::Val{:linf_divb}, du, u, t, mesh::DGMultiMesh, equations::IdealGlmMhdEquations2D, dg::DGMulti, cache) - B1 = get_component(u, 6) - B2 = get_component(u, 7) - B = (B1, B2) - - uEltype = eltype(B1) - linf_divB = zero(uEltype) - local_divB = zeros(uEltype, size(B1, 1)) - for e in eachelement(mesh, dg, cache) - compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache) - - # compute maximum norm - linf_divB = max(linf_divB, maximum(abs, local_divB)) - end + B1 = get_component(u, 6) + B2 = get_component(u, 7) + B = (B1, B2) + + uEltype = eltype(B1) + linf_divB = zero(uEltype) + local_divB = zeros(uEltype, size(B1, 1)) + for e in eachelement(mesh, dg, cache) + compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache) + + # compute maximum norm + linf_divB = max(linf_divB, maximum(abs, local_divB)) + end - return linf_divB + return linf_divB end function integrate(func::typeof(enstrophy), u, mesh::DGMultiMesh, equations, equations_parabolic::CompressibleNavierStokesDiffusion3D, dg::DGMulti, - cache, cache_parabolic; normalize=true) - - gradients_x, gradients_y, gradients_z = cache_parabolic.gradients - - # allocate local storage for gradients. - # TODO: can we avoid allocating here? - local_gradient_quadrature_values = ntuple(_ -> similar(cache_parabolic.local_u_values_threaded), 3) - - integral = zero(eltype(u)) - for e in eachelement(mesh, dg) - u_quadrature_values = cache_parabolic.local_u_values_threaded[Threads.threadid()] - gradient_x_quadrature_values = local_gradient_quadrature_values[1][Threads.threadid()] - gradient_y_quadrature_values = local_gradient_quadrature_values[2][Threads.threadid()] - gradient_z_quadrature_values = local_gradient_quadrature_values[3][Threads.threadid()] - - # interpolate to quadrature on each element - apply_to_each_field(mul_by(dg.basis.Vq), u_quadrature_values, view(u, :, e)) - apply_to_each_field(mul_by(dg.basis.Vq), gradient_x_quadrature_values, view(gradients_x, :, e)) - apply_to_each_field(mul_by(dg.basis.Vq), gradient_y_quadrature_values, view(gradients_y, :, e)) - apply_to_each_field(mul_by(dg.basis.Vq), gradient_z_quadrature_values, view(gradients_z, :, e)) - - # integrate over the element - for i in eachindex(u_quadrature_values) - gradients_i = SVector(gradient_x_quadrature_values[i], - gradient_y_quadrature_values[i], - gradient_z_quadrature_values[i]) - integral += mesh.md.wJq[i, e] * func(u_quadrature_values[i], gradients_i, equations) + cache, cache_parabolic; normalize = true) + gradients_x, gradients_y, gradients_z = cache_parabolic.gradients + + # allocate local storage for gradients. + # TODO: can we avoid allocating here? + local_gradient_quadrature_values = ntuple(_ -> similar(cache_parabolic.local_u_values_threaded), + 3) + + integral = zero(eltype(u)) + for e in eachelement(mesh, dg) + u_quadrature_values = cache_parabolic.local_u_values_threaded[Threads.threadid()] + gradient_x_quadrature_values = local_gradient_quadrature_values[1][Threads.threadid()] + gradient_y_quadrature_values = local_gradient_quadrature_values[2][Threads.threadid()] + gradient_z_quadrature_values = local_gradient_quadrature_values[3][Threads.threadid()] + + # interpolate to quadrature on each element + apply_to_each_field(mul_by!(dg.basis.Vq), u_quadrature_values, view(u, :, e)) + apply_to_each_field(mul_by!(dg.basis.Vq), gradient_x_quadrature_values, + view(gradients_x, :, e)) + apply_to_each_field(mul_by!(dg.basis.Vq), gradient_y_quadrature_values, + view(gradients_y, :, e)) + apply_to_each_field(mul_by!(dg.basis.Vq), gradient_z_quadrature_values, + view(gradients_z, :, e)) + + # integrate over the element + for i in eachindex(u_quadrature_values) + gradients_i = SVector(gradient_x_quadrature_values[i], + gradient_y_quadrature_values[i], + gradient_z_quadrature_values[i]) + integral += mesh.md.wJq[i, e] * + func(u_quadrature_values[i], gradients_i, equations) + end end - end - return integral + return integral end - function create_cache_analysis(analyzer, mesh::DGMultiMesh, equations, dg::DGMulti, cache, RealT, uEltype) - md = mesh.md - return (; ) + md = mesh.md + return (;) end SolutionAnalyzer(rd::RefElemData) = rd nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache) - if mpi_isparallel() - error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI") - else - return ndofs(mesh, solver, cache) - end + if mpi_isparallel() + error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI") + else + return ndofs(mesh, solver, cache) + end end - - end # @muladd diff --git a/src/callbacks_step/averaging.jl b/src/callbacks_step/averaging.jl index 1052efe4bee..8d2dcfeaefe 100644 --- a/src/callbacks_step/averaging.jl +++ b/src/callbacks_step/averaging.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ AveragingCallback(semi::SemidiscretizationHyperbolic, tspan; output_directory="out", @@ -19,112 +19,114 @@ mean speed of sound, mean density, and mean vorticity for each node over the tim that this callback does not support adaptive mesh refinement ([`AMRCallback`](@ref)). """ struct AveragingCallback{TSpan, MeanValues, Cache} - tspan::TSpan - mean_values::MeanValues - cache::Cache - output_directory::String - filename::String + tspan::TSpan + mean_values::MeanValues + cache::Cache + output_directory::String + filename::String end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AveragingCallback}) - @nospecialize cb # reduce precompilation time - averaging_callback = cb.affect! - @unpack tspan = averaging_callback - - print(io, "AveragingCallback(tspan=", tspan, ")") -end - -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AveragingCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else + @nospecialize cb # reduce precompilation time averaging_callback = cb.affect! + @unpack tspan = averaging_callback - setup = [ - "Start time" => first(averaging_callback.tspan), - "Final time" => last(averaging_callback.tspan) - ] - summary_box(io, "AveragingCallback", setup) - end + print(io, "AveragingCallback(tspan=", tspan, ")") end -function AveragingCallback(semi::SemidiscretizationHyperbolic{<:Any, <:CompressibleEulerEquations2D}, - tspan; output_directory="out", filename="averaging.h5") - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - mean_values = initialize_mean_values(mesh, equations, solver, cache) - cache = create_cache(AveragingCallback, mesh, equations, solver, cache) - - averaging_callback = AveragingCallback(tspan, mean_values, cache, output_directory, filename) - condition = (u, t, integrator) -> first(tspan) <= t <= last(tspan) - - return DiscreteCallback(condition, averaging_callback, save_positions=(false,false), - initialize=initialize!) +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:AveragingCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + averaging_callback = cb.affect! + + setup = [ + "Start time" => first(averaging_callback.tspan), + "Final time" => last(averaging_callback.tspan), + ] + summary_box(io, "AveragingCallback", setup) + end end +function AveragingCallback(semi::SemidiscretizationHyperbolic{<:Any, + <:CompressibleEulerEquations2D + }, + tspan; output_directory = "out", filename = "averaging.h5") + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + mean_values = initialize_mean_values(mesh, equations, solver, cache) + cache = create_cache(AveragingCallback, mesh, equations, solver, cache) + + averaging_callback = AveragingCallback(tspan, mean_values, cache, output_directory, + filename) + condition = (u, t, integrator) -> first(tspan) <= t <= last(tspan) + + return DiscreteCallback(condition, averaging_callback, + save_positions = (false, false), + initialize = initialize!) +end -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator) where {Condition, Affect!<:AveragingCallback} - averaging_callback = cb.affect! - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, + integrator) where {Condition, Affect! <: AveragingCallback} + averaging_callback = cb.affect! + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array(u_ode, mesh, equations, solver, cache) - @trixi_timeit timer() "averaging" initialize_cache!(averaging_callback.cache, u, - mesh, equations, solver, cache) + @trixi_timeit timer() "averaging" initialize_cache!(averaging_callback.cache, u, + mesh, equations, solver, cache) - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end # This function is called during time integration and updates the mean values according to the # trapezoidal rule function (averaging_callback::AveragingCallback)(integrator) - @unpack mean_values = averaging_callback + @unpack mean_values = averaging_callback - u_ode = integrator.u - u_prev_ode = integrator.uprev - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - u_prev = wrap_array(u_prev_ode, mesh, equations, solver, cache) + u_ode = integrator.u + u_prev_ode = integrator.uprev + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array(u_ode, mesh, equations, solver, cache) + u_prev = wrap_array(u_prev_ode, mesh, equations, solver, cache) - dt = integrator.t - integrator.tprev - tspan = averaging_callback.tspan + dt = integrator.t - integrator.tprev + tspan = averaging_callback.tspan - integration_constant = 0.5 * dt / (tspan[2] - tspan[1]) # .5 due to trapezoidal rule + integration_constant = 0.5 * dt / (tspan[2] - tspan[1]) # .5 due to trapezoidal rule - @trixi_timeit timer() "averaging" calc_mean_values!(mean_values, averaging_callback.cache, - u, u_prev, integration_constant, - mesh, equations, solver, cache) + @trixi_timeit timer() "averaging" calc_mean_values!(mean_values, + averaging_callback.cache, + u, u_prev, integration_constant, + mesh, equations, solver, cache) - # Store mean values in a file if this is the last time step - if isfinished(integrator) - save_averaging_file(averaging_callback, semi) - end + # Store mean values in a file if this is the last time step + if isfinished(integrator) + save_averaging_file(averaging_callback, semi) + end - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) - return nothing + return nothing end - function save_averaging_file(averaging_callback, semi::AbstractSemidiscretization) - # Create output directory if it doesn't exist - mkpath(averaging_callback.output_directory) + # Create output directory if it doesn't exist + mkpath(averaging_callback.output_directory) - save_averaging_file(averaging_callback, mesh_equations_solver_cache(semi)...) + save_averaging_file(averaging_callback, mesh_equations_solver_cache(semi)...) end function load_averaging_file(averaging_file, semi::AbstractSemidiscretization) - load_averaging_file(averaging_file, mesh_equations_solver_cache(semi)...) + load_averaging_file(averaging_file, mesh_equations_solver_cache(semi)...) end - include("averaging_dg.jl") include("averaging_dg2d.jl") - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/callbacks_step/averaging_dg.jl b/src/callbacks_step/averaging_dg.jl index c73b982b093..ca6b839f457 100644 --- a/src/callbacks_step/averaging_dg.jl +++ b/src/callbacks_step/averaging_dg.jl @@ -3,49 +3,49 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - - -function save_averaging_file(averaging_callback, mesh::TreeMesh, equations, dg::DGSEM, cache) - @unpack output_directory, filename, mean_values = averaging_callback - h5open(joinpath(output_directory, filename), "w") do file - # Add context information - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_elements"] = nelements(dg, cache) - - # Store all mean variables as multi-dimensional arrays - for field in fieldnames(typeof(mean_values)) - name = string(field) - data = getfield(mean_values, field) - file[name] = data +#! format: noindent + +function save_averaging_file(averaging_callback, mesh::TreeMesh, equations, dg::DGSEM, + cache) + @unpack output_directory, filename, mean_values = averaging_callback + h5open(joinpath(output_directory, filename), "w") do file + # Add context information + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_elements"] = nelements(dg, cache) + + # Store all mean variables as multi-dimensional arrays + for field in fieldnames(typeof(mean_values)) + name = string(field) + data = getfield(mean_values, field) + file[name] = data + end end - end - return filename + return filename end +function load_averaging_file(averaging_file, mesh::TreeMesh, equations, dg::DGSEM, + cache) + # Read and check mesh and solver info + h5open(averaging_file, "r") do file + n_dims = read(attributes(file)["ndims"]) + n_nodes = read(attributes(file)["polydeg"]) + 1 + n_elements = read(attributes(file)["n_elements"]) + + @assert n_dims==ndims(mesh) "ndims differs from value in averaging file" + @assert n_nodes - 1==polydeg(dg) "polynomial degree in solver differs from value in averaging file" + @assert n_elements==nelements(dg, cache) "nelements in solver differs from value in averaging file" + end -function load_averaging_file(averaging_file, mesh::TreeMesh, equations, dg::DGSEM, cache) - # Read and check mesh and solver info - h5open(averaging_file, "r") do file - n_dims = read(attributes(file)["ndims"]) - n_nodes = read(attributes(file)["polydeg"]) + 1 - n_elements = read(attributes(file)["n_elements"]) - - @assert n_dims == ndims(mesh) "ndims differs from value in averaging file" - @assert n_nodes - 1 == polydeg(dg) "polynomial degree in solver differs from value in averaging file" - @assert n_elements == nelements(dg, cache) "nelements in solver differs from value in averaging file" - end - - # Read and return mean values - v_mean, c_mean, rho_mean, vorticity_mean = h5open(averaging_file, "r") do file - return read(file["v_mean"]), - read(file["c_mean"]), - read(file["rho_mean"]), - read(file["vorticity_mean"]) - end + # Read and return mean values + v_mean, c_mean, rho_mean, vorticity_mean = h5open(averaging_file, "r") do file + return read(file["v_mean"]), + read(file["c_mean"]), + read(file["rho_mean"]), + read(file["vorticity_mean"]) + end - return (; v_mean, c_mean, rho_mean, vorticity_mean) + return (; v_mean, c_mean, rho_mean, vorticity_mean) end - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/callbacks_step/averaging_dg2d.jl b/src/callbacks_step/averaging_dg2d.jl index 70eafcb29e2..959a5655d96 100644 --- a/src/callbacks_step/averaging_dg2d.jl +++ b/src/callbacks_step/averaging_dg2d.jl @@ -3,75 +3,84 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Create arrays with DGSEM-specific structure to store the mean values and set them all to 0 -function initialize_mean_values(mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2}, +function initialize_mean_values(mesh::TreeMesh{2}, + equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache) - uEltype = eltype(cache.elements) - v_mean = zeros(uEltype, (ndims(equations), nnodes(dg), nnodes(dg), nelements(cache.elements))) - c_mean = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements))) - rho_mean = zeros(uEltype, size(c_mean)) - vorticity_mean = zeros(uEltype, size(c_mean)) - - return (; v_mean, c_mean, rho_mean, vorticity_mean) + uEltype = eltype(cache.elements) + v_mean = zeros(uEltype, + (ndims(equations), nnodes(dg), nnodes(dg), + nelements(cache.elements))) + c_mean = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements))) + rho_mean = zeros(uEltype, size(c_mean)) + vorticity_mean = zeros(uEltype, size(c_mean)) + + return (; v_mean, c_mean, rho_mean, vorticity_mean) end # Create cache which holds the vorticity for the previous time step. This is needed due to the # trapezoidal rule function create_cache(::Type{AveragingCallback}, mesh::TreeMesh{2}, - equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache) - # Cache vorticity from previous time step - uEltype = eltype(cache.elements) - vorticity_prev = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements))) - return (; vorticity_prev) + equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, + cache) + # Cache vorticity from previous time step + uEltype = eltype(cache.elements) + vorticity_prev = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements))) + return (; vorticity_prev) end # Calculate vorticity for the initial solution and store it in the cache function initialize_cache!(averaging_callback_cache, u, - mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2}, + mesh::TreeMesh{2}, + equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache) - @unpack vorticity_prev = averaging_callback_cache + @unpack vorticity_prev = averaging_callback_cache - # Calculate vorticity for initial solution - calc_vorticity!(vorticity_prev, u, mesh, equations, dg, cache) + # Calculate vorticity for initial solution + calc_vorticity!(vorticity_prev, u, mesh, equations, dg, cache) - return nothing + return nothing end - # Update mean values using the trapezoidal rule -function calc_mean_values!(mean_values, averaging_callback_cache, u, u_prev, integration_constant, - mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2}, +function calc_mean_values!(mean_values, averaging_callback_cache, u, u_prev, + integration_constant, + mesh::TreeMesh{2}, + equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache) - @unpack v_mean, c_mean, rho_mean, vorticity_mean = mean_values - @unpack vorticity_prev = averaging_callback_cache - - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - vorticity = calc_vorticity_node(u, mesh, equations, dg, cache, i, j, element) - vorticity_prev_node = vorticity_prev[i, j, element] - vorticity_prev[i, j, element] = vorticity # Cache current vorticity for the next time step - - u_node_prim = cons2prim(get_node_vars(u, equations, dg, i, j, element), equations) - u_prev_node_prim = cons2prim(get_node_vars(u_prev, equations, dg, i, j, element), equations) - - rho, v1, v2, p = u_node_prim - rho_prev, v1_prev, v2_prev, p_prev = u_prev_node_prim - - c = sqrt(equations.gamma * p / rho) - c_prev = sqrt(equations.gamma * p_prev / rho_prev) - - # Calculate the contribution to the mean values using the trapezoidal rule - vorticity_mean[i, j, element] += integration_constant * (vorticity_prev_node + vorticity) - v_mean[1, i, j, element] += integration_constant * (v1_prev + v1) - v_mean[2, i, j, element] += integration_constant * (v2_prev + v2) - c_mean[i, j, element] += integration_constant * (c_prev + c) - rho_mean[i, j, element] += integration_constant * (rho_prev + rho) + @unpack v_mean, c_mean, rho_mean, vorticity_mean = mean_values + @unpack vorticity_prev = averaging_callback_cache + + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + vorticity = calc_vorticity_node(u, mesh, equations, dg, cache, i, j, + element) + vorticity_prev_node = vorticity_prev[i, j, element] + vorticity_prev[i, j, element] = vorticity # Cache current vorticity for the next time step + + u_node_prim = cons2prim(get_node_vars(u, equations, dg, i, j, element), + equations) + u_prev_node_prim = cons2prim(get_node_vars(u_prev, equations, dg, i, j, + element), equations) + + rho, v1, v2, p = u_node_prim + rho_prev, v1_prev, v2_prev, p_prev = u_prev_node_prim + + c = sqrt(equations.gamma * p / rho) + c_prev = sqrt(equations.gamma * p_prev / rho_prev) + + # Calculate the contribution to the mean values using the trapezoidal rule + vorticity_mean[i, j, element] += integration_constant * + (vorticity_prev_node + vorticity) + v_mean[1, i, j, element] += integration_constant * (v1_prev + v1) + v_mean[2, i, j, element] += integration_constant * (v2_prev + v2) + c_mean[i, j, element] += integration_constant * (c_prev + c) + rho_mean[i, j, element] += integration_constant * (rho_prev + rho) + end end - end - return nothing + return nothing end - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/callbacks_step/callbacks_step.jl b/src/callbacks_step/callbacks_step.jl index 0b2c4ef4d5f..09d197bf225 100644 --- a/src/callbacks_step/callbacks_step.jl +++ b/src/callbacks_step/callbacks_step.jl @@ -3,32 +3,33 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # overload this function for specific callbacks which use element element variables # that should be saved -get_element_variables!(element_variables, u, mesh, equations, solver, cache, - callback; kwargs...) = nothing +function get_element_variables!(element_variables, u, mesh, equations, solver, cache, + callback; kwargs...) + nothing +end @inline function get_element_variables!(element_variables, u_ode, - semi::AbstractSemidiscretization, cb::DiscreteCallback; + semi::AbstractSemidiscretization, + cb::DiscreteCallback; kwargs...) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - get_element_variables!(element_variables, u, mesh, equations, solver, cache, - cb.affect!; kwargs...) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array(u_ode, mesh, equations, solver, cache) + get_element_variables!(element_variables, u, mesh, equations, solver, cache, + cb.affect!; kwargs...) end - @inline function isfinished(integrator) - # Checking for floating point equality is OK here as `DifferentialEquations.jl` - # sets the time exactly to the final time in the last iteration - return integrator.t == last(integrator.sol.prob.tspan) || - isempty(integrator.opts.tstops) || - integrator.iter == integrator.opts.maxiters + # Checking for floating point equality is OK here as `DifferentialEquations.jl` + # sets the time exactly to the final time in the last iteration + return integrator.t == last(integrator.sol.prob.tspan) || + isempty(integrator.opts.tstops) || + integrator.iter == integrator.opts.maxiters end - # `include` callback definitions in the order that we currently prefer # when combining them into a `CallbackSet` which is called *after* a complete step # The motivation is as follows: The first callbacks belong to the current time step iteration: @@ -64,12 +65,10 @@ include("glm_speed.jl") include("lbm_collision.jl") include("euler_acoustics_coupling.jl") - # The `TrivialCallback` purposely does nothing: It allows to quickly disable specific callbacks # when using `trixi_include` or `test_trixi_include` include("trivial.jl") # DGMulti callbacks include("analysis_dgmulti.jl") - end # @muladd diff --git a/src/callbacks_step/euler_acoustics_coupling.jl b/src/callbacks_step/euler_acoustics_coupling.jl index 8847fc62b23..ea33175d0c5 100644 --- a/src/callbacks_step/euler_acoustics_coupling.jl +++ b/src/callbacks_step/euler_acoustics_coupling.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" EulerAcousticsCouplingCallback @@ -34,34 +34,37 @@ the [`AveragingCallback`](@ref). A direct-hybrid method for aeroacoustic analysis [DOI: 10.18154/RWTH-2017-04082](https://doi.org/10.18154/RWTH-2017-04082) """ -mutable struct EulerAcousticsCouplingCallback{RealT<:Real, MeanValues, IntegratorEuler} - stepsize_callback_acoustics::StepsizeCallback{RealT} - stepsize_callback_euler::StepsizeCallback{RealT} - mean_values::MeanValues - integrator_euler::IntegratorEuler +mutable struct EulerAcousticsCouplingCallback{RealT <: Real, MeanValues, IntegratorEuler + } + stepsize_callback_acoustics::StepsizeCallback{RealT} + stepsize_callback_euler::StepsizeCallback{RealT} + mean_values::MeanValues + integrator_euler::IntegratorEuler end +function Base.show(io::IO, + cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback}) + @nospecialize cb # reduce precompilation time + euler_acoustics_coupling = cb.affect! -function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback}) - @nospecialize cb # reduce precompilation time - euler_acoustics_coupling = cb.affect! - - print(io, "EulerAcousticsCouplingCallback(") - print(io, euler_acoustics_coupling.stepsize_callback_acoustics) - print(io, ", ", euler_acoustics_coupling.stepsize_callback_euler, ")") + print(io, "EulerAcousticsCouplingCallback(") + print(io, euler_acoustics_coupling.stepsize_callback_acoustics) + print(io, ", ", euler_acoustics_coupling.stepsize_callback_euler, ")") end -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback}) - @nospecialize cb # reduce precompilation time - euler_acoustics_coupling = cb.affect! - - summary_header(io, "EulerAcousticsCouplingCallback") - summary_line(io, "acoustics StepsizeCallback", euler_acoustics_coupling.stepsize_callback_acoustics) - summary_line(io, "Euler StepsizeCallback", euler_acoustics_coupling.stepsize_callback_euler) - summary_footer(io) +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback}) + @nospecialize cb # reduce precompilation time + euler_acoustics_coupling = cb.affect! + + summary_header(io, "EulerAcousticsCouplingCallback") + summary_line(io, "acoustics StepsizeCallback", + euler_acoustics_coupling.stepsize_callback_acoustics) + summary_line(io, "Euler StepsizeCallback", + euler_acoustics_coupling.stepsize_callback_euler) + summary_footer(io) end - """ EulerAcousticsCouplingCallback(ode_euler, averaging_callback::DiscreteCallback{<:Any, <:AveragingCallback}, @@ -81,12 +84,16 @@ The mean values for the acoustic perturbation equations are read from `averaging (see [`AveragingCallback`](@ref)). """ function EulerAcousticsCouplingCallback(ode_euler, - averaging_callback::DiscreteCallback{<:Any, <:AveragingCallback}, - alg, cfl_acoustics::Real, cfl_euler::Real; kwargs...) - @unpack mean_values = averaging_callback.affect! - - return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler; + averaging_callback::DiscreteCallback{<:Any, + <:AveragingCallback + }, + alg, cfl_acoustics::Real, cfl_euler::Real; kwargs...) + @unpack mean_values = averaging_callback.affect! + + return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, + cfl_euler; + kwargs...) end """ @@ -108,99 +115,106 @@ The mean values for the acoustic perturbation equations are read from `averaging """ function EulerAcousticsCouplingCallback(ode_euler, averaging_file::AbstractString, alg, cfl_acoustics::Real, cfl_euler::Real; kwargs...) - semi_euler = ode_euler.p - mean_values = load_averaging_file(averaging_file, semi_euler) + semi_euler = ode_euler.p + mean_values = load_averaging_file(averaging_file, semi_euler) - return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler; - kwargs...) + return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, + cfl_euler; + kwargs...) end -function EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler; +function EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, + cfl_euler; kwargs...) - # Set up ODE Integrator for Euler equations - integrator_euler = init(ode_euler, alg, save_everystep=false, dt=1.0; kwargs...) # dt will be overwritten - - euler_acoustics_coupling = EulerAcousticsCouplingCallback{typeof(cfl_acoustics), - typeof(mean_values), - typeof(integrator_euler)}( - StepsizeCallback(cfl_acoustics), StepsizeCallback(cfl_euler), mean_values, integrator_euler) - condition = (u, t, integrator) -> true - - return DiscreteCallback(condition, euler_acoustics_coupling, save_positions=(false, false), - initialize=initialize!) + # Set up ODE Integrator for Euler equations + integrator_euler = init(ode_euler, alg, save_everystep = false, dt = 1.0; kwargs...) # dt will be overwritten + + euler_acoustics_coupling = EulerAcousticsCouplingCallback{typeof(cfl_acoustics), + typeof(mean_values), + typeof(integrator_euler)}(StepsizeCallback(cfl_acoustics), + StepsizeCallback(cfl_euler), + mean_values, + integrator_euler) + condition = (u, t, integrator) -> true + + return DiscreteCallback(condition, euler_acoustics_coupling, + save_positions = (false, false), + initialize = initialize!) end - # This is called before the main loop and initializes the mean values in u_ode -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator_acoustics) where {Condition, Affect!<:EulerAcousticsCouplingCallback} - euler_acoustics_coupling = cb.affect! - semi = integrator_acoustics.p - @unpack semi_acoustics = semi - - # Initialize mean values in u_ode - u_acoustics = wrap_array(u_ode, semi_acoustics) - @unpack mean_values = euler_acoustics_coupling - @views @. u_acoustics[4:5, .., :] = mean_values.v_mean - @views @. u_acoustics[6, .., :] = mean_values.c_mean - @views @. u_acoustics[7, .., :] = mean_values.rho_mean - - # Adjust stepsize, advance the flow solver by one time step - cb.affect!(integrator_acoustics) - - return nothing +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, + integrator_acoustics) where {Condition, + Affect! <: + EulerAcousticsCouplingCallback} + euler_acoustics_coupling = cb.affect! + semi = integrator_acoustics.p + @unpack semi_acoustics = semi + + # Initialize mean values in u_ode + u_acoustics = wrap_array(u_ode, semi_acoustics) + @unpack mean_values = euler_acoustics_coupling + @views @. u_acoustics[4:5, .., :] = mean_values.v_mean + @views @. u_acoustics[6, .., :] = mean_values.c_mean + @views @. u_acoustics[7, .., :] = mean_values.rho_mean + + # Adjust stepsize, advance the flow solver by one time step + cb.affect!(integrator_acoustics) + + return nothing end - # This function is called at the end of every time step and advances the Euler solution by one # time step, manages the time stepsize for both the acoustics and Euler solvers and calculates the # acoustic sources for the next acoustics time step function (euler_acoustics_coupling::EulerAcousticsCouplingCallback)(integrator_acoustics) - @unpack stepsize_callback_acoustics, stepsize_callback_euler, integrator_euler = euler_acoustics_coupling - - @assert integrator_acoustics.t == integrator_euler.t - - # Use the minimum of the acoustics and Euler stepsizes for both solvers - stepsize_callback_acoustics(integrator_acoustics) - stepsize_callback_euler(integrator_euler) - dt = min(get_proposed_dt(integrator_acoustics), get_proposed_dt(integrator_euler)) - - set_proposed_dt!(integrator_acoustics, dt) - integrator_acoustics.opts.dtmax = dt - integrator_acoustics.dtcache = dt - - set_proposed_dt!(integrator_euler, dt) - integrator_euler.opts.dtmax = dt - integrator_euler.dtcache = dt - - # Advance the Euler solution by one step and check for errors - if !isfinished(integrator_euler) - @trixi_timeit timer() "Euler solver" step!(integrator_euler) - return_code = check_error(integrator_euler) - if !(SciMLBase.successful_retcode(return_code) || - return_code != SciMLBase.ReturnCode.Default) - error("Error during compressible Euler time integration. Received return code $(return_code)") + @unpack stepsize_callback_acoustics, stepsize_callback_euler, integrator_euler = euler_acoustics_coupling + + @assert integrator_acoustics.t == integrator_euler.t + + # Use the minimum of the acoustics and Euler stepsizes for both solvers + stepsize_callback_acoustics(integrator_acoustics) + stepsize_callback_euler(integrator_euler) + dt = min(get_proposed_dt(integrator_acoustics), get_proposed_dt(integrator_euler)) + + set_proposed_dt!(integrator_acoustics, dt) + integrator_acoustics.opts.dtmax = dt + integrator_acoustics.dtcache = dt + + set_proposed_dt!(integrator_euler, dt) + integrator_euler.opts.dtmax = dt + integrator_euler.dtcache = dt + + # Advance the Euler solution by one step and check for errors + if !isfinished(integrator_euler) + @trixi_timeit timer() "Euler solver" step!(integrator_euler) + return_code = check_error(integrator_euler) + if !(SciMLBase.successful_retcode(return_code) || + return_code != SciMLBase.ReturnCode.Default) + error("Error during compressible Euler time integration. Received return code $(return_code)") + end end - end - # Calculate acoustic sources based on linearized lamb vector - semi = integrator_acoustics.p - semi_euler = integrator_euler.p - u_acoustics = wrap_array(integrator_acoustics.u, semi) - u_euler = wrap_array(integrator_euler.u, semi_euler) - @unpack acoustic_source_terms, coupled_element_ids = semi.cache - @unpack vorticity_mean = euler_acoustics_coupling.mean_values - - @trixi_timeit timer() "calc acoustic source terms" calc_acoustic_sources!( - acoustic_source_terms, u_euler, u_acoustics, vorticity_mean, coupled_element_ids, - mesh_equations_solver_cache(semi_euler)...) + # Calculate acoustic sources based on linearized lamb vector + semi = integrator_acoustics.p + semi_euler = integrator_euler.p + u_acoustics = wrap_array(integrator_acoustics.u, semi) + u_euler = wrap_array(integrator_euler.u, semi_euler) + @unpack acoustic_source_terms, coupled_element_ids = semi.cache + @unpack vorticity_mean = euler_acoustics_coupling.mean_values + + @trixi_timeit timer() "calc acoustic source terms" begin + calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics, + vorticity_mean, coupled_element_ids, + mesh_equations_solver_cache(semi_euler)...) + end - # avoid re-evaluation possible FSAL stages - u_modified!(integrator_acoustics, false) - u_modified!(integrator_euler, false) + # avoid re-evaluation possible FSAL stages + u_modified!(integrator_acoustics, false) + u_modified!(integrator_euler, false) - return nothing + return nothing end include("euler_acoustics_coupling_dg2d.jl") - end # @muladd diff --git a/src/callbacks_step/euler_acoustics_coupling_dg2d.jl b/src/callbacks_step/euler_acoustics_coupling_dg2d.jl index 0891515038c..16fac4f2d8d 100644 --- a/src/callbacks_step/euler_acoustics_coupling_dg2d.jl +++ b/src/callbacks_step/euler_acoustics_coupling_dg2d.jl @@ -3,38 +3,42 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent - -function calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics, vorticity_mean, +function calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics, + vorticity_mean, coupled_element_ids, mesh, - equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache) - - acoustic_source_terms .= zero(eltype(acoustic_source_terms)) - - @threaded for k in 1:length(coupled_element_ids) - element = coupled_element_ids[k] - - for j in eachnode(dg), i in eachnode(dg) - vorticity = calc_vorticity_node(u_euler, mesh, equations, dg, cache, i, j, element) - - prim_euler = cons2prim(get_node_vars(u_euler, equations, dg, i, j, element), equations) - v1 = prim_euler[2] - v2 = prim_euler[3] - v1_mean = u_acoustics[4, i, j, element] - v2_mean = u_acoustics[5, i, j, element] - - vorticity_prime = vorticity - vorticity_mean[i, j, element] - v1_prime = v1 - v1_mean - v2_prime = v2 - v2_mean - - acoustic_source_terms[1, i, j, k] -= -vorticity_prime * v2_mean - - vorticity_mean[i, j, element] * v2_prime - acoustic_source_terms[2, i, j, k] -= vorticity_prime * v1_mean + - vorticity_mean[i, j, element] * v1_prime + equations::AbstractCompressibleEulerEquations{2}, + dg::DGSEM, cache) + acoustic_source_terms .= zero(eltype(acoustic_source_terms)) + + @threaded for k in 1:length(coupled_element_ids) + element = coupled_element_ids[k] + + for j in eachnode(dg), i in eachnode(dg) + vorticity = calc_vorticity_node(u_euler, mesh, equations, dg, cache, i, j, + element) + + prim_euler = cons2prim(get_node_vars(u_euler, equations, dg, i, j, element), + equations) + v1 = prim_euler[2] + v2 = prim_euler[3] + v1_mean = u_acoustics[4, i, j, element] + v2_mean = u_acoustics[5, i, j, element] + + vorticity_prime = vorticity - vorticity_mean[i, j, element] + v1_prime = v1 - v1_mean + v2_prime = v2 - v2_mean + + acoustic_source_terms[1, i, j, k] -= -vorticity_prime * v2_mean - + vorticity_mean[i, j, element] * + v2_prime + acoustic_source_terms[2, i, j, k] -= vorticity_prime * v1_mean + + vorticity_mean[i, j, element] * + v1_prime + end end - end - return nothing + return nothing end - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/callbacks_step/glm_speed.jl b/src/callbacks_step/glm_speed.jl index 03809c97e83..036f61a522b 100644 --- a/src/callbacks_step/glm_speed.jl +++ b/src/callbacks_step/glm_speed.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ GlmSpeedCallback(; glm_scale=0.5, cfl) @@ -15,82 +15,74 @@ The `cfl` number should be set to the same value as for the time step size calcu solution and should thus be set to a value within the interval [0,1]. Note that `glm_scale = 0` deactivates the divergence cleaning. """ -struct GlmSpeedCallback{RealT<:Real} - glm_scale::RealT - cfl::RealT +struct GlmSpeedCallback{RealT <: Real} + glm_scale::RealT + cfl::RealT end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:GlmSpeedCallback}) - @nospecialize cb # reduce precompilation time - - glm_speed_callback = cb.affect! - @unpack glm_scale, cfl = glm_speed_callback - print(io, "GlmSpeedCallback(glm_scale=", glm_scale, ", cfl=", cfl, ")") -end + @nospecialize cb # reduce precompilation time - -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:GlmSpeedCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else glm_speed_callback = cb.affect! - - setup = [ - "GLM wave speed scaling" => glm_speed_callback.glm_scale, - "Expected CFL number" => glm_speed_callback.cfl, - ] - summary_box(io, "GlmSpeedCallback", setup) - end + @unpack glm_scale, cfl = glm_speed_callback + print(io, "GlmSpeedCallback(glm_scale=", glm_scale, ", cfl=", cfl, ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:GlmSpeedCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + glm_speed_callback = cb.affect! + + setup = [ + "GLM wave speed scaling" => glm_speed_callback.glm_scale, + "Expected CFL number" => glm_speed_callback.cfl, + ] + summary_box(io, "GlmSpeedCallback", setup) + end +end -function GlmSpeedCallback(; glm_scale=0.5, cfl) - - @assert 0 <= glm_scale <= 1 "glm_scale must be between 0 and 1" +function GlmSpeedCallback(; glm_scale = 0.5, cfl) + @assert 0<=glm_scale<=1 "glm_scale must be between 0 and 1" - glm_speed_callback = GlmSpeedCallback(glm_scale, cfl) + glm_speed_callback = GlmSpeedCallback(glm_scale, cfl) - DiscreteCallback(glm_speed_callback, glm_speed_callback, # the first one is the condition, the second the affect! - save_positions=(false,false), - initialize=initialize!) + DiscreteCallback(glm_speed_callback, glm_speed_callback, # the first one is the condition, the second the affect! + save_positions = (false, false), + initialize = initialize!) end - -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:GlmSpeedCallback} - cb.affect!(integrator) +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: GlmSpeedCallback} + cb.affect!(integrator) end - # this method is called to determine whether the callback should be activated function (glm_speed_callback::GlmSpeedCallback)(u, t, integrator) - return true + return true end - # This method is called as callback after the StepsizeCallback during the time integration. @inline function (glm_speed_callback::GlmSpeedCallback)(integrator) + dt = get_proposed_dt(integrator) + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + @unpack glm_scale, cfl = glm_speed_callback - dt = get_proposed_dt(integrator) - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - @unpack glm_scale, cfl = glm_speed_callback + # compute time step for GLM linear advection equation with c_h=1 (redone due to the possible AMR) + c_h_deltat = calc_dt_for_cleaning_speed(cfl, mesh, equations, solver, cache) - # compute time step for GLM linear advection equation with c_h=1 (redone due to the possible AMR) - c_h_deltat = calc_dt_for_cleaning_speed(cfl, mesh, equations, solver, cache) + # c_h is proportional to its own time step divided by the complete MHD time step + equations.c_h = glm_scale * c_h_deltat / dt - # c_h is proportional to its own time step divided by the complete MHD time step - equations.c_h = glm_scale * c_h_deltat / dt + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - - return nothing + return nothing end include("glm_speed_dg.jl") - - end # @muladd diff --git a/src/callbacks_step/glm_speed_dg.jl b/src/callbacks_step/glm_speed_dg.jl index eef01ed0471..0686c547a34 100644 --- a/src/callbacks_step/glm_speed_dg.jl +++ b/src/callbacks_step/glm_speed_dg.jl @@ -3,35 +3,38 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function calc_dt_for_cleaning_speed(cfl::Real, mesh, - equations::Union{AbstractIdealGlmMhdEquations, AbstractIdealGlmMhdMulticomponentEquations}, dg::DG, cache) -# compute time step for GLM linear advection equation with c_h=1 for the DG discretization on -# Cartesian meshes - max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) * ndims(equations) - # OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted - # as well if that callback changes. - return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h) + equations::Union{AbstractIdealGlmMhdEquations, + AbstractIdealGlmMhdMulticomponentEquations + }, dg::DG, cache) + # compute time step for GLM linear advection equation with c_h=1 for the DG discretization on + # Cartesian meshes + max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) * + ndims(equations) + # OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted + # as well if that callback changes. + return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h) end function calc_dt_for_cleaning_speed(cfl::Real, mesh, - equations::Union{AbstractIdealGlmMhdEquations, AbstractIdealGlmMhdMulticomponentEquations}, + equations::Union{AbstractIdealGlmMhdEquations, + AbstractIdealGlmMhdMulticomponentEquations + }, dg::DGMulti, cache) - rd = dg.basis - md = mesh.md + rd = dg.basis + md = mesh.md - # Compute time step for GLM linear advection equation with c_h=1 for a DGMulti discretization. - # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations. - max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations) + # Compute time step for GLM linear advection equation with c_h=1 for a DGMulti discretization. + # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations. + max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations) - # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by - # `polydeg+1`. This is because `nnodes(dg)` returns the total number of - # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns - # the number of 1D nodes for `DGSEM` solvers. - polydeg = rd.N - return cfl * 2 / ((polydeg + 1) * max_scaled_speed_for_c_h) + # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by + # `polydeg+1`. This is because `nnodes(dg)` returns the total number of + # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns + # the number of 1D nodes for `DGSEM` solvers. + polydeg = rd.N + return cfl * 2 / ((polydeg + 1) * max_scaled_speed_for_c_h) end - - end # @muladd diff --git a/src/callbacks_step/lbm_collision.jl b/src/callbacks_step/lbm_collision.jl index 7bd11830c63..33c2806d6a6 100644 --- a/src/callbacks_step/lbm_collision.jl +++ b/src/callbacks_step/lbm_collision.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ LBMCollisionCallback() @@ -12,57 +12,55 @@ Apply the Lattice-Boltzmann method (LBM) collision operator before each time ste See [`LatticeBoltzmannEquations2D`](@ref) for further details. """ function LBMCollisionCallback() - DiscreteCallback(lbm_collision_callback, lbm_collision_callback, - save_positions=(false,false), - initialize=initialize!) + DiscreteCallback(lbm_collision_callback, lbm_collision_callback, + save_positions = (false, false), + initialize = initialize!) end # Always execute collision step after a time step, but not after the last step lbm_collision_callback(u, t, integrator) = !isfinished(integrator) +function Base.show(io::IO, + cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)}) + @nospecialize cb # reduce precompilation time -function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)}) - @nospecialize cb # reduce precompilation time - - print(io, "LBMCollisionCallback()") + print(io, "LBMCollisionCallback()") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)}) + @nospecialize cb # reduce precompilation time -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else - summary_box(io, "LBMCollisionCallback") - end + if get(io, :compact, false) + show(io, cb) + else + summary_box(io, "LBMCollisionCallback") + end end - # Execute collision step once in the very beginning -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:typeof(lbm_collision_callback)} - cb.affect!(integrator) +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, + Affect! <: typeof(lbm_collision_callback)} + cb.affect!(integrator) end - # This method is called as callback after the StepsizeCallback during the time integration. @inline function lbm_collision_callback(integrator) + dt = get_proposed_dt(integrator) + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + @unpack collision_op = equations - dt = get_proposed_dt(integrator) - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - @unpack collision_op = equations - - u_ode = integrator.u - u = wrap_array(u_ode, mesh, equations, solver, cache) + u_ode = integrator.u + u = wrap_array(u_ode, mesh, equations, solver, cache) - @trixi_timeit timer() "LBM collision" apply_collision!(u, dt, collision_op, mesh, equations, solver, cache) + @trixi_timeit timer() "LBM collision" apply_collision!(u, dt, collision_op, mesh, + equations, solver, cache) - return nothing + return nothing end include("lbm_collision_dg2d.jl") include("lbm_collision_dg3d.jl") - - end # @muladd diff --git a/src/callbacks_step/lbm_collision_dg2d.jl b/src/callbacks_step/lbm_collision_dg2d.jl index 3a6cdaddac1..932edfd61f6 100644 --- a/src/callbacks_step/lbm_collision_dg2d.jl +++ b/src/callbacks_step/lbm_collision_dg2d.jl @@ -3,21 +3,18 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function apply_collision!(u, dt, collision_op, mesh::AbstractMesh{2}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - update = collision_op(u_node, dt, equations) - add_to_node_vars!(u, update, equations, dg, i, j, element) + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + update = collision_op(u_node, dt, equations) + add_to_node_vars!(u, update, equations, dg, i, j, element) + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/callbacks_step/lbm_collision_dg3d.jl b/src/callbacks_step/lbm_collision_dg3d.jl index 4c1326b3608..0620f77159d 100644 --- a/src/callbacks_step/lbm_collision_dg3d.jl +++ b/src/callbacks_step/lbm_collision_dg3d.jl @@ -3,21 +3,18 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function apply_collision!(u, dt, collision_op, mesh::AbstractMesh{3}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - update = collision_op(u_node, dt, equations) - add_to_node_vars!(u, update, equations, dg, i, j, k, element) + @threaded for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + update = collision_op(u_node, dt, equations) + add_to_node_vars!(u, update, equations, dg, i, j, k, element) + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl index 4597c3ce920..e23f58f26ea 100644 --- a/src/callbacks_step/save_restart.jl +++ b/src/callbacks_step/save_restart.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SaveRestartCallback(; interval=0, @@ -13,132 +13,126 @@ Save the current numerical solution in a restart file every `interval` time steps. """ mutable struct SaveRestartCallback - interval::Int - save_final_restart::Bool - output_directory::String + interval::Int + save_final_restart::Bool + output_directory::String end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SaveRestartCallback}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - restart_callback = cb.affect! - print(io, "SaveRestartCallback(interval=", restart_callback.interval, ")") + restart_callback = cb.affect! + print(io, "SaveRestartCallback(interval=", restart_callback.interval, ")") end -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SaveRestartCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else - save_restart_callback = cb.affect! - - setup = [ - "interval" => save_restart_callback.interval, - "save final solution" => save_restart_callback.save_final_restart ? "yes" : "no", - "output directory" => abspath(normpath(save_restart_callback.output_directory)), - ] - summary_box(io, "SaveRestartCallback", setup) - end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SaveRestartCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + save_restart_callback = cb.affect! + + setup = [ + "interval" => save_restart_callback.interval, + "save final solution" => save_restart_callback.save_final_restart ? "yes" : + "no", + "output directory" => abspath(normpath(save_restart_callback.output_directory)), + ] + summary_box(io, "SaveRestartCallback", setup) + end end +function SaveRestartCallback(; interval = 0, + save_final_restart = true, + output_directory = "out") + restart_callback = SaveRestartCallback(interval, save_final_restart, + output_directory) -function SaveRestartCallback(; interval=0, - save_final_restart=true, - output_directory="out") - - restart_callback = SaveRestartCallback(interval, save_final_restart, - output_directory) - - DiscreteCallback(restart_callback, restart_callback, # the first one is the condition, the second the affect! - save_positions=(false,false), - initialize=initialize!) + DiscreteCallback(restart_callback, restart_callback, # the first one is the condition, the second the affect! + save_positions = (false, false), + initialize = initialize!) end +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: SaveRestartCallback} + restart_callback = cb.affect! -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:SaveRestartCallback} - restart_callback = cb.affect! - - mpi_isroot() && mkpath(restart_callback.output_directory) + mpi_isroot() && mkpath(restart_callback.output_directory) - semi = integrator.p - mesh, _, _, _ = mesh_equations_solver_cache(semi) - @trixi_timeit timer() "I/O" begin - if mesh.unsaved_changes - mesh.current_filename = save_mesh_file(mesh, restart_callback.output_directory) - mesh.unsaved_changes = false + semi = integrator.p + mesh, _, _, _ = mesh_equations_solver_cache(semi) + @trixi_timeit timer() "I/O" begin + if mesh.unsaved_changes + mesh.current_filename = save_mesh_file(mesh, + restart_callback.output_directory) + mesh.unsaved_changes = false + end end - end - return nothing + return nothing end - # this method is called to determine whether the callback should be activated function (restart_callback::SaveRestartCallback)(u, t, integrator) - @unpack interval, save_final_restart = restart_callback - - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - return interval > 0 && ( - ((integrator.stats.naccept % interval == 0) && !(integrator.stats.naccept == 0 && integrator.iter > 0)) || - (save_final_restart && isfinished(integrator))) + @unpack interval, save_final_restart = restart_callback + + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + return interval > 0 && (((integrator.stats.naccept % interval == 0) && + !(integrator.stats.naccept == 0 && integrator.iter > 0)) || + (save_final_restart && isfinished(integrator))) end - # this method is called when the callback is activated function (restart_callback::SaveRestartCallback)(integrator) - u_ode = integrator.u - @unpack t, dt = integrator - iter = integrator.stats.naccept - semi = integrator.p - mesh, _, _, _ = mesh_equations_solver_cache(semi) - - @trixi_timeit timer() "I/O" begin - if mesh.unsaved_changes - mesh.current_filename = save_mesh_file(mesh, restart_callback.output_directory, iter) - mesh.unsaved_changes = false + u_ode = integrator.u + @unpack t, dt = integrator + iter = integrator.stats.naccept + semi = integrator.p + mesh, _, _, _ = mesh_equations_solver_cache(semi) + + @trixi_timeit timer() "I/O" begin + if mesh.unsaved_changes + mesh.current_filename = save_mesh_file(mesh, + restart_callback.output_directory, + iter) + mesh.unsaved_changes = false + end + + save_restart_file(u_ode, t, dt, iter, semi, restart_callback) end - save_restart_file(u_ode, t, dt, iter, semi, restart_callback) - end - - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end - @inline function save_restart_file(u_ode, t, dt, iter, semi::AbstractSemidiscretization, restart_callback) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array_native(u_ode, mesh, equations, solver, cache) - save_restart_file(u, t, dt, iter, mesh, equations, solver, cache, restart_callback) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array_native(u_ode, mesh, equations, solver, cache) + save_restart_file(u, t, dt, iter, mesh, equations, solver, cache, restart_callback) end - """ load_time(restart_file::AbstractString) Load the time saved in a `restart_file`. """ function load_time(restart_file::AbstractString) - h5open(restart_file, "r") do file - read(attributes(file)["time"]) - end + h5open(restart_file, "r") do file + read(attributes(file)["time"]) + end end - function load_restart_file(semi::AbstractSemidiscretization, restart_file) - load_restart_file(mesh_equations_solver_cache(semi)..., restart_file) + load_restart_file(mesh_equations_solver_cache(semi)..., restart_file) end - include("save_restart_dg.jl") - - end # @muladd diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl index a46a8bc856b..5695eb8bede 100644 --- a/src/callbacks_step/save_restart_dg.jl +++ b/src/callbacks_step/save_restart_dg.jl @@ -3,324 +3,327 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function save_restart_file(u, time, dt, timestep, - mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh}, + mesh::Union{SerialTreeMesh, StructuredMesh, + UnstructuredMesh2D, SerialP4estMesh}, equations, dg::DG, cache, restart_callback) - - @unpack output_directory = restart_callback - - # Filename based on current time step - filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep)) - - # Restart files always store conservative variables - data = u - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = nvariables(equations) - attributes(file)["n_elements"] = nelements(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution - for v in eachvariable(equations) - # Convert to 1D array - file["variables_$v"] = vec(data[v, .., :]) - - # Add variable name as attribute - var = file["variables_$v"] - attributes(var)["name"] = varnames(cons2cons, equations)[v] + @unpack output_directory = restart_callback + + # Filename based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep)) + + # Restart files always store conservative variables + data = u + + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = nvariables(equations) + attributes(file)["n_elements"] = nelements(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution + for v in eachvariable(equations) + # Convert to 1D array + file["variables_$v"] = vec(data[v, .., :]) + + # Add variable name as attribute + var = file["variables_$v"] + attributes(var)["name"] = varnames(cons2cons, equations)[v] + end end - end - return filename + return filename end - -function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh}, +function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh, + UnstructuredMesh2D, SerialP4estMesh}, equations, dg::DG, cache, restart_file) - # allocate memory - u_ode = allocate_coefficients(mesh, equations, dg, cache) - u = wrap_array_native(u_ode, mesh, equations, dg, cache) - - h5open(restart_file, "r") do file - # Read attributes to perform some sanity checks - if read(attributes(file)["ndims"]) != ndims(mesh) - error("restart mismatch: ndims differs from value in restart file") - end - if read(attributes(file)["equations"]) != get_name(equations) - error("restart mismatch: equations differ from value in restart file") - end - if read(attributes(file)["polydeg"]) != polydeg(dg) - error("restart mismatch: polynomial degree in solver differs from value in restart file") - end - if read(attributes(file)["n_elements"]) != nelements(dg, cache) - error("restart mismatch: number of elements in solver differs from value in restart file") - end - - # Read data - for v in eachvariable(equations) - # Check if variable name matches - var = file["variables_$v"] - if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v] - error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") - end - - # Read variable - u[v, .., :] = read(file["variables_$v"]) + # allocate memory + u_ode = allocate_coefficients(mesh, equations, dg, cache) + u = wrap_array_native(u_ode, mesh, equations, dg, cache) + + h5open(restart_file, "r") do file + # Read attributes to perform some sanity checks + if read(attributes(file)["ndims"]) != ndims(mesh) + error("restart mismatch: ndims differs from value in restart file") + end + if read(attributes(file)["equations"]) != get_name(equations) + error("restart mismatch: equations differ from value in restart file") + end + if read(attributes(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attributes(file)["n_elements"]) != nelements(dg, cache) + error("restart mismatch: number of elements in solver differs from value in restart file") + end + + # Read data + for v in eachvariable(equations) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attributes(var)["name"])) != + varnames(cons2cons, equations)[v] + error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") + end + + # Read variable + u[v, .., :] = read(file["variables_$v"]) + end end - end - return u_ode + return u_ode end - function save_restart_file(u, time, dt, timestep, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, + dg::DG, cache, restart_callback) + @unpack output_directory = restart_callback + # Filename based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep)) - @unpack output_directory = restart_callback - # Filename based on current time step - filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep)) - - if HDF5.has_parallel() - save_restart_file_parallel(u, time, dt, timestep, mesh, equations, dg, cache, filename) - else - save_restart_file_on_root(u, time, dt, timestep, mesh, equations, dg, cache, filename) - end + if HDF5.has_parallel() + save_restart_file_parallel(u, time, dt, timestep, mesh, equations, dg, cache, + filename) + else + save_restart_file_on_root(u, time, dt, timestep, mesh, equations, dg, cache, + filename) + end end - function save_restart_file_parallel(u, time, dt, timestep, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, filename) - # Restart files always store conservative variables - data = u - - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) - node_counts = element_counts * Cint(element_size) - # Cumulative sum of nodes per rank starting with an additional 0 - cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) - - # Open file (clobber existing content) - h5open(filename, "w", mpi_comm()) do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = nvariables(equations) - attributes(file)["n_elements"] = nelementsglobal(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution - for v in eachvariable(equations) - # Need to create dataset explicitly in parallel case - var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),))) - # Write data of each process in slices (ranks start with 0) - slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] - # Convert to 1D array - var[slice] = vec(data[v, .., :]) - # Add variable name as attribute - attributes(var)["name"] = varnames(cons2cons, equations)[v] + # Restart files always store conservative variables + data = u + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) + # Cumulative sum of nodes per rank starting with an additional 0 + cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) + + # Open file (clobber existing content) + h5open(filename, "w", mpi_comm()) do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = nvariables(equations) + attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution + for v in eachvariable(equations) + # Need to create dataset explicitly in parallel case + var = create_dataset(file, "/variables_$v", datatype(eltype(data)), + dataspace((ndofsglobal(mesh, dg, cache),))) + # Write data of each process in slices (ranks start with 0) + slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] + # Convert to 1D array + var[slice] = vec(data[v, .., :]) + # Add variable name as attribute + attributes(var)["name"] = varnames(cons2cons, equations)[v] + end end - end - return filename + return filename end - function save_restart_file_on_root(u, time, dt, timestep, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, filename) - # Restart files always store conservative variables - data = u + # Restart files always store conservative variables + data = u - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) - node_counts = element_counts * Cint(element_size) + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) - # non-root ranks only send data - if !mpi_isroot() - # Send nodal data to root - for v in eachvariable(equations) - MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm()) - end + # non-root ranks only send data + if !mpi_isroot() + # Send nodal data to root + for v in eachvariable(equations) + MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm()) + end - return filename - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = nvariables(equations) - attributes(file)["n_elements"] = nelements(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution - for v in eachvariable(equations) - # Convert to 1D array - recv = Vector{eltype(data)}(undef, sum(node_counts)) - MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), mpi_root(), mpi_comm()) - file["variables_$v"] = recv - - # Add variable name as attribute - var = file["variables_$v"] - attributes(var)["name"] = varnames(cons2cons, equations)[v] + return filename end - end - - return filename -end - -function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file) + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = nvariables(equations) + attributes(file)["n_elements"] = nelements(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution + for v in eachvariable(equations) + # Convert to 1D array + recv = Vector{eltype(data)}(undef, sum(node_counts)) + MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), + mpi_root(), mpi_comm()) + file["variables_$v"] = recv + + # Add variable name as attribute + var = file["variables_$v"] + attributes(var)["name"] = varnames(cons2cons, equations)[v] + end + end - if HDF5.has_parallel() - load_restart_file_parallel(mesh, equations, dg, cache, restart_file) - else - load_restart_file_on_root(mesh, equations, dg, cache, restart_file) - end + return filename end - -function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file) - - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) - node_counts = element_counts * Cint(element_size) - # Cumulative sum of nodes per rank starting with an additional 0 - cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) - - # allocate memory - u_ode = allocate_coefficients(mesh, equations, dg, cache) - u = wrap_array_native(u_ode, mesh, equations, dg, cache) - - # read in parallel - h5open(restart_file, "r", mpi_comm()) do file - # Read attributes to perform some sanity checks - if read(attributes(file)["ndims"]) != ndims(mesh) - error("restart mismatch: ndims differs from value in restart file") - end - if read(attributes(file)["equations"]) != get_name(equations) - error("restart mismatch: equations differ from value in restart file") - end - if read(attributes(file)["polydeg"]) != polydeg(dg) - error("restart mismatch: polynomial degree in solver differs from value in restart file") - end - if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache) - error("restart mismatch: number of elements in solver differs from value in restart file") - end - - # Read data - for v in eachvariable(equations) - # Check if variable name matches - var = file["variables_$v"] - if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v] - error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") - end - - # Read variable - mpi_println("Reading variables_$v ($name)...") - # Read data of each process in slices (ranks start with 0) - slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] - # Convert 1D array back to actual size of `u` - u[v, .., :] = reshape(read(var)[slice], size(@view u[v, .., :])) +function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, + dg::DG, cache, restart_file) + if HDF5.has_parallel() + load_restart_file_parallel(mesh, equations, dg, cache, restart_file) + else + load_restart_file_on_root(mesh, equations, dg, cache, restart_file) end - end - - return u_ode end - -function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file) - - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) - node_counts = element_counts * Cint(element_size) - - # allocate memory - u_ode = allocate_coefficients(mesh, equations, dg, cache) - u = wrap_array_native(u_ode, mesh, equations, dg, cache) - - # non-root ranks only receive data - if !mpi_isroot() - # Receive nodal data from root - for v in eachvariable(equations) - # put Scatterv in both blocks of the if condition to avoid type instability - if isempty(u) - data = eltype(u)[] - MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm()) - else - data = @view u[v, .., :] - MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm()) - end +function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, restart_file) + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) + # Cumulative sum of nodes per rank starting with an additional 0 + cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) + + # allocate memory + u_ode = allocate_coefficients(mesh, equations, dg, cache) + u = wrap_array_native(u_ode, mesh, equations, dg, cache) + + # read in parallel + h5open(restart_file, "r", mpi_comm()) do file + # Read attributes to perform some sanity checks + if read(attributes(file)["ndims"]) != ndims(mesh) + error("restart mismatch: ndims differs from value in restart file") + end + if read(attributes(file)["equations"]) != get_name(equations) + error("restart mismatch: equations differ from value in restart file") + end + if read(attributes(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache) + error("restart mismatch: number of elements in solver differs from value in restart file") + end + + # Read data + for v in eachvariable(equations) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attributes(var)["name"])) != + varnames(cons2cons, equations)[v] + error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") + end + + # Read variable + mpi_println("Reading variables_$v ($name)...") + # Read data of each process in slices (ranks start with 0) + slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] + # Convert 1D array back to actual size of `u` + u[v, .., :] = reshape(read(var)[slice], size(@view u[v, .., :])) + end end return u_ode - end +end - # read only on MPI root - h5open(restart_file, "r") do file - # Read attributes to perform some sanity checks - if read(attributes(file)["ndims"]) != ndims(mesh) - error("restart mismatch: ndims differs from value in restart file") - end - if read(attributes(file)["equations"]) != get_name(equations) - error("restart mismatch: equations differ from value in restart file") - end - if read(attributes(file)["polydeg"]) != polydeg(dg) - error("restart mismatch: polynomial degree in solver differs from value in restart file") - end - if read(attributes(file)["n_elements"]) != nelements(dg, cache) - error("restart mismatch: number of elements in solver differs from value in restart file") +function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, restart_file) + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) + + # allocate memory + u_ode = allocate_coefficients(mesh, equations, dg, cache) + u = wrap_array_native(u_ode, mesh, equations, dg, cache) + + # non-root ranks only receive data + if !mpi_isroot() + # Receive nodal data from root + for v in eachvariable(equations) + # put Scatterv in both blocks of the if condition to avoid type instability + if isempty(u) + data = eltype(u)[] + MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm()) + else + data = @view u[v, .., :] + MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm()) + end + end + + return u_ode end - # Read data - for v in eachvariable(equations) - # Check if variable name matches - var = file["variables_$v"] - if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v] - error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") - end - - # Read variable - println("Reading variables_$v ($name)...") - sendbuf = MPI.VBuffer(read(file["variables_$v"]), node_counts) - MPI.Scatterv!(sendbuf, @view(u[v, .., :]), mpi_root(), mpi_comm()) + # read only on MPI root + h5open(restart_file, "r") do file + # Read attributes to perform some sanity checks + if read(attributes(file)["ndims"]) != ndims(mesh) + error("restart mismatch: ndims differs from value in restart file") + end + if read(attributes(file)["equations"]) != get_name(equations) + error("restart mismatch: equations differ from value in restart file") + end + if read(attributes(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attributes(file)["n_elements"]) != nelements(dg, cache) + error("restart mismatch: number of elements in solver differs from value in restart file") + end + + # Read data + for v in eachvariable(equations) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attributes(var)["name"])) != + varnames(cons2cons, equations)[v] + error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'") + end + + # Read variable + println("Reading variables_$v ($name)...") + sendbuf = MPI.VBuffer(read(file["variables_$v"]), node_counts) + MPI.Scatterv!(sendbuf, @view(u[v, .., :]), mpi_root(), mpi_comm()) + end end - end - return u_ode + return u_ode end - - end # @muladd diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index 6cccbc9d3f9..55f17bbc1c7 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SaveSolutionCallback(; interval::Integer=0, @@ -22,200 +22,211 @@ to `solution_variables` will be the set of conservative variables and the second parameter is the equation struct. """ mutable struct SaveSolutionCallback{IntervalType, SolutionVariablesType} - interval_or_dt::IntervalType - save_initial_solution::Bool - save_final_solution::Bool - output_directory::String - solution_variables::SolutionVariablesType + interval_or_dt::IntervalType + save_initial_solution::Bool + save_final_solution::Bool + output_directory::String + solution_variables::SolutionVariablesType end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SaveSolutionCallback}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - save_solution_callback = cb.affect! - print(io, "SaveSolutionCallback(interval=", save_solution_callback.interval_or_dt, ")") + save_solution_callback = cb.affect! + print(io, "SaveSolutionCallback(interval=", save_solution_callback.interval_or_dt, + ")") end function Base.show(io::IO, - cb::DiscreteCallback{<:Any, <:PeriodicCallbackAffect{<:SaveSolutionCallback}}) - @nospecialize cb # reduce precompilation time + cb::DiscreteCallback{<:Any, + <:PeriodicCallbackAffect{<:SaveSolutionCallback + }}) + @nospecialize cb # reduce precompilation time - save_solution_callback = cb.affect!.affect! - print(io, "SaveSolutionCallback(dt=", save_solution_callback.interval_or_dt, ")") + save_solution_callback = cb.affect!.affect! + print(io, "SaveSolutionCallback(dt=", save_solution_callback.interval_or_dt, ")") end -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SaveSolutionCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else - save_solution_callback = cb.affect! - - setup = [ - "interval" => save_solution_callback.interval_or_dt, - "solution variables" => save_solution_callback.solution_variables, - "save initial solution" => save_solution_callback.save_initial_solution ? "yes" : "no", - "save final solution" => save_solution_callback.save_final_solution ? "yes" : "no", - "output directory" => abspath(normpath(save_solution_callback.output_directory)), - ] - summary_box(io, "SaveSolutionCallback", setup) - end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SaveSolutionCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + save_solution_callback = cb.affect! + + setup = [ + "interval" => save_solution_callback.interval_or_dt, + "solution variables" => save_solution_callback.solution_variables, + "save initial solution" => save_solution_callback.save_initial_solution ? + "yes" : "no", + "save final solution" => save_solution_callback.save_final_solution ? + "yes" : "no", + "output directory" => abspath(normpath(save_solution_callback.output_directory)), + ] + summary_box(io, "SaveSolutionCallback", setup) + end end function Base.show(io::IO, ::MIME"text/plain", - cb::DiscreteCallback{<:Any, <:PeriodicCallbackAffect{<:SaveSolutionCallback}}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else - save_solution_callback = cb.affect!.affect! - - setup = [ - "dt" => save_solution_callback.interval_or_dt, - "solution variables" => save_solution_callback.solution_variables, - "save initial solution" => save_solution_callback.save_initial_solution ? "yes" : "no", - "save final solution" => save_solution_callback.save_final_solution ? "yes" : "no", - "output directory" => abspath(normpath(save_solution_callback.output_directory)), - ] - summary_box(io, "SaveSolutionCallback", setup) - end + cb::DiscreteCallback{<:Any, + <:PeriodicCallbackAffect{<:SaveSolutionCallback + }}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + save_solution_callback = cb.affect!.affect! + + setup = [ + "dt" => save_solution_callback.interval_or_dt, + "solution variables" => save_solution_callback.solution_variables, + "save initial solution" => save_solution_callback.save_initial_solution ? + "yes" : "no", + "save final solution" => save_solution_callback.save_final_solution ? + "yes" : "no", + "output directory" => abspath(normpath(save_solution_callback.output_directory)), + ] + summary_box(io, "SaveSolutionCallback", setup) + end end +function SaveSolutionCallback(; interval::Integer = 0, + dt = nothing, + save_initial_solution = true, + save_final_solution = true, + output_directory = "out", + solution_variables = cons2prim) + if !isnothing(dt) && interval > 0 + throw(ArgumentError("You can either set the number of steps between output (using `interval`) or the time between outputs (using `dt`) but not both simultaneously")) + end -function SaveSolutionCallback(; interval::Integer=0, - dt=nothing, - save_initial_solution=true, - save_final_solution=true, - output_directory="out", - solution_variables=cons2prim) - - if !isnothing(dt) && interval > 0 - throw(ArgumentError("You can either set the number of steps between output (using `interval`) or the time between outputs (using `dt`) but not both simultaneously")) - end - - # Expected most frequent behavior comes first - if isnothing(dt) - interval_or_dt = interval - else # !isnothing(dt) - interval_or_dt = dt - end - - solution_callback = SaveSolutionCallback(interval_or_dt, - save_initial_solution, save_final_solution, - output_directory, solution_variables) - - # Expected most frequent behavior comes first - if isnothing(dt) - # Save every `interval` (accepted) time steps - # The first one is the condition, the second the affect! - return DiscreteCallback(solution_callback, solution_callback, - save_positions=(false,false), - initialize=initialize_save_cb!) - else - # Add a `tstop` every `dt`, and save the final solution. - return PeriodicCallback(solution_callback, dt, - save_positions=(false, false), - initialize=initialize_save_cb!, - final_affect=save_final_solution) - end -end + # Expected most frequent behavior comes first + if isnothing(dt) + interval_or_dt = interval + else # !isnothing(dt) + interval_or_dt = dt + end + solution_callback = SaveSolutionCallback(interval_or_dt, + save_initial_solution, save_final_solution, + output_directory, solution_variables) + + # Expected most frequent behavior comes first + if isnothing(dt) + # Save every `interval` (accepted) time steps + # The first one is the condition, the second the affect! + return DiscreteCallback(solution_callback, solution_callback, + save_positions = (false, false), + initialize = initialize_save_cb!) + else + # Add a `tstop` every `dt`, and save the final solution. + return PeriodicCallback(solution_callback, dt, + save_positions = (false, false), + initialize = initialize_save_cb!, + final_affect = save_final_solution) + end +end function initialize_save_cb!(cb, u, t, integrator) - # The SaveSolutionCallback is either cb.affect! (with DiscreteCallback) - # or cb.affect!.affect! (with PeriodicCallback). - # Let recursive dispatch handle this. - initialize_save_cb!(cb.affect!, u, t, integrator) + # The SaveSolutionCallback is either cb.affect! (with DiscreteCallback) + # or cb.affect!.affect! (with PeriodicCallback). + # Let recursive dispatch handle this. + initialize_save_cb!(cb.affect!, u, t, integrator) end function initialize_save_cb!(solution_callback::SaveSolutionCallback, u, t, integrator) - mpi_isroot() && mkpath(solution_callback.output_directory) - - semi = integrator.p - mesh, _, _, _ = mesh_equations_solver_cache(semi) - @trixi_timeit timer() "I/O" begin - if mesh.unsaved_changes - mesh.current_filename = save_mesh_file(mesh, solution_callback.output_directory) - mesh.unsaved_changes = false + mpi_isroot() && mkpath(solution_callback.output_directory) + + semi = integrator.p + mesh, _, _, _ = mesh_equations_solver_cache(semi) + @trixi_timeit timer() "I/O" begin + if mesh.unsaved_changes + mesh.current_filename = save_mesh_file(mesh, + solution_callback.output_directory) + mesh.unsaved_changes = false + end end - end - if solution_callback.save_initial_solution - solution_callback(integrator) - end + if solution_callback.save_initial_solution + solution_callback(integrator) + end - return nothing + return nothing end - # this method is called to determine whether the callback should be activated function (solution_callback::SaveSolutionCallback)(u, t, integrator) - @unpack interval_or_dt, save_final_solution = solution_callback - - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - return interval_or_dt > 0 && ( - ((integrator.stats.naccept % interval_or_dt == 0) && !(integrator.stats.naccept == 0 && integrator.iter > 0)) || - (save_final_solution && isfinished(integrator))) + @unpack interval_or_dt, save_final_solution = solution_callback + + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + return interval_or_dt > 0 && (((integrator.stats.naccept % interval_or_dt == 0) && + !(integrator.stats.naccept == 0 && integrator.iter > 0)) || + (save_final_solution && isfinished(integrator))) end - # this method is called when the callback is activated function (solution_callback::SaveSolutionCallback)(integrator) - u_ode = integrator.u - @unpack t, dt = integrator - iter = integrator.stats.naccept - semi = integrator.p - mesh, _, _, _ = mesh_equations_solver_cache(semi) - - @trixi_timeit timer() "I/O" begin - @trixi_timeit timer() "save mesh" if mesh.unsaved_changes - mesh.current_filename = save_mesh_file(mesh, solution_callback.output_directory, iter) - mesh.unsaved_changes = false - end - - element_variables = Dict{Symbol, Any}() - @trixi_timeit timer() "get element variables" begin - get_element_variables!(element_variables, u_ode, semi) - callbacks = integrator.opts.callback - if callbacks isa CallbackSet - for cb in callbacks.continuous_callbacks - get_element_variables!(element_variables, u_ode, semi, cb; t=integrator.t, iter=integrator.stats.naccept) + u_ode = integrator.u + @unpack t, dt = integrator + iter = integrator.stats.naccept + semi = integrator.p + mesh, _, _, _ = mesh_equations_solver_cache(semi) + + @trixi_timeit timer() "I/O" begin + @trixi_timeit timer() "save mesh" if mesh.unsaved_changes + mesh.current_filename = save_mesh_file(mesh, + solution_callback.output_directory, + iter) + mesh.unsaved_changes = false end - for cb in callbacks.discrete_callbacks - get_element_variables!(element_variables, u_ode, semi, cb; t=integrator.t, iter=integrator.stats.naccept) + + element_variables = Dict{Symbol, Any}() + @trixi_timeit timer() "get element variables" begin + get_element_variables!(element_variables, u_ode, semi) + callbacks = integrator.opts.callback + if callbacks isa CallbackSet + for cb in callbacks.continuous_callbacks + get_element_variables!(element_variables, u_ode, semi, cb; + t = integrator.t, + iter = integrator.stats.naccept) + end + for cb in callbacks.discrete_callbacks + get_element_variables!(element_variables, u_ode, semi, cb; + t = integrator.t, + iter = integrator.stats.naccept) + end + end end - end - end - @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter, semi, solution_callback, element_variables) - end + @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter, + semi, + solution_callback, + element_variables) + end - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end - @inline function save_solution_file(u_ode, t, dt, iter, semi::AbstractSemidiscretization, solution_callback, - element_variables=Dict{Symbol,Any}()) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array_native(u_ode, mesh, equations, solver, cache) - save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, solution_callback, element_variables) + element_variables = Dict{Symbol, Any}()) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array_native(u_ode, mesh, equations, solver, cache) + save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, + solution_callback, element_variables) end - # TODO: Taal refactor, move save_mesh_file? # function save_mesh_file(mesh::TreeMesh, output_directory, timestep=-1) in io/io.jl include("save_solution_dg.jl") - - end # @muladd diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl index 6d1cdf0151b..6cd4a0ec9c1 100644 --- a/src/callbacks_step/save_solution_dg.jl +++ b/src/callbacks_step/save_solution_dg.jl @@ -3,238 +3,253 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function save_solution_file(u, time, dt, timestep, - mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh}, + mesh::Union{SerialTreeMesh, StructuredMesh, + UnstructuredMesh2D, SerialP4estMesh}, equations, dg::DG, cache, - solution_callback, element_variables=Dict{Symbol,Any}(); - system="") - - @unpack output_directory, solution_variables = solution_callback - - # Filename based on current time step - if isempty(system) - filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep)) - else - filename = joinpath(output_directory, @sprintf("solution_%s_%06d.h5", system, timestep)) - end - - # Convert to different set of variables if requested - if solution_variables === cons2cons - data = u - n_vars = nvariables(equations) - else - # Reinterpret the solution array as an array of conservative variables, - # compute the solution variables via broadcasting, and reinterpret the - # result as a plain array of floating point numbers - data = Array(reinterpret(eltype(u), - solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u), - Ref(equations)))) - - # Find out variable count by looking at output from `solution_variables` function - n_vars = size(data, 1) - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = n_vars - attributes(file)["n_elements"] = nelements(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution data - for v in 1:n_vars - # Convert to 1D array - file["variables_$v"] = vec(data[v, .., :]) - - # Add variable name as attribute - var = file["variables_$v"] - attributes(var)["name"] = varnames(solution_variables, equations)[v] + solution_callback, element_variables = Dict{Symbol, Any}(); + system = "") + @unpack output_directory, solution_variables = solution_callback + + # Filename based on current time step + if isempty(system) + filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep)) + else + filename = joinpath(output_directory, + @sprintf("solution_%s_%06d.h5", system, timestep)) end - # Store element variables - for (v, (key, element_variable)) in enumerate(element_variables) - # Add to file - file["element_variables_$v"] = element_variable + # Convert to different set of variables if requested + if solution_variables === cons2cons + data = u + n_vars = nvariables(equations) + else + # Reinterpret the solution array as an array of conservative variables, + # compute the solution variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(u), + solution_variables.(reinterpret(SVector{ + nvariables(equations), + eltype(u)}, u), + Ref(equations)))) + + # Find out variable count by looking at output from `solution_variables` function + n_vars = size(data, 1) + end - # Add variable name as attribute - var = file["element_variables_$v"] - attributes(var)["name"] = string(key) + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = n_vars + attributes(file)["n_elements"] = nelements(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution data + for v in 1:n_vars + # Convert to 1D array + file["variables_$v"] = vec(data[v, .., :]) + + # Add variable name as attribute + var = file["variables_$v"] + attributes(var)["name"] = varnames(solution_variables, equations)[v] + end + + # Store element variables + for (v, (key, element_variable)) in enumerate(element_variables) + # Add to file + file["element_variables_$v"] = element_variable + + # Add variable name as attribute + var = file["element_variables_$v"] + attributes(var)["name"] = string(key) + end end - end - return filename + return filename end - function save_solution_file(u, time, dt, timestep, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, - solution_callback, element_variables=Dict{Symbol,Any}(); - system="") - - @unpack output_directory, solution_variables = solution_callback - - # Filename based on current time step - if isempty(system) - filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep)) - else - filename = joinpath(output_directory, @sprintf("solution_%s_%06d.h5", system, timestep)) - end - - # Convert to different set of variables if requested - if solution_variables === cons2cons - data = u - n_vars = nvariables(equations) - else - # Reinterpret the solution array as an array of conservative variables, - # compute the solution variables via broadcasting, and reinterpret the - # result as a plain array of floating point numbers - data = Array(reinterpret(eltype(u), - solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u), - Ref(equations)))) - - # Find out variable count by looking at output from `solution_variables` function - n_vars = size(data, 1) - end - - if HDF5.has_parallel() - save_solution_file_parallel(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables) - else - save_solution_file_on_root(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables) - end -end + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, + dg::DG, cache, + solution_callback, element_variables = Dict{Symbol, Any}(); + system = "") + @unpack output_directory, solution_variables = solution_callback + + # Filename based on current time step + if isempty(system) + filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep)) + else + filename = joinpath(output_directory, + @sprintf("solution_%s_%06d.h5", system, timestep)) + end + # Convert to different set of variables if requested + if solution_variables === cons2cons + data = u + n_vars = nvariables(equations) + else + # Reinterpret the solution array as an array of conservative variables, + # compute the solution variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(u), + solution_variables.(reinterpret(SVector{ + nvariables(equations), + eltype(u)}, u), + Ref(equations)))) + + # Find out variable count by looking at output from `solution_variables` function + n_vars = size(data, 1) + end -function save_solution_file_parallel(data, time, dt, timestep, n_vars, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, - solution_variables, filename, element_variables=Dict{Symbol,Any}()) - - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = cache.mpi_cache.n_elements_by_rank - node_counts = element_counts * element_size - # Cumulative sum of elements per rank starting with an additional 0 - cum_element_counts = append!(zeros(eltype(element_counts), 1), cumsum(element_counts)) - # Cumulative sum of nodes per rank starting with an additional 0 - cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) - - # Open file using parallel HDF5 (clobber existing content) - h5open(filename, "w", mpi_comm()) do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = n_vars - attributes(file)["n_elements"] = nelementsglobal(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution data - for v in 1:n_vars - # Need to create dataset explicitly in parallel case - var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),))) - # Write data of each process in slices (ranks start with 0) - slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] - # Convert to 1D array - var[slice] = vec(data[v, .., :]) - # Add variable name as attribute - attributes(var)["name"] = varnames(solution_variables, equations)[v] + if HDF5.has_parallel() + save_solution_file_parallel(data, time, dt, timestep, n_vars, mesh, equations, + dg, cache, solution_variables, filename, + element_variables) + else + save_solution_file_on_root(data, time, dt, timestep, n_vars, mesh, equations, + dg, cache, solution_variables, filename, + element_variables) end +end - # Store element variables - for (v, (key, element_variable)) in enumerate(element_variables) - # Need to create dataset explicitly in parallel case - var = create_dataset(file, "/element_variables_$v", datatype(eltype(element_variable)), dataspace((nelementsglobal(dg, cache),))) - - # Write data of each process in slices (ranks start with 0) - slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2] - # Add to file - var[slice] = element_variable - # Add variable name as attribute - attributes(var)["name"] = string(key) +function save_solution_file_parallel(data, time, dt, timestep, n_vars, + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, + solution_variables, filename, + element_variables = Dict{Symbol, Any}()) + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = cache.mpi_cache.n_elements_by_rank + node_counts = element_counts * element_size + # Cumulative sum of elements per rank starting with an additional 0 + cum_element_counts = append!(zeros(eltype(element_counts), 1), + cumsum(element_counts)) + # Cumulative sum of nodes per rank starting with an additional 0 + cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts)) + + # Open file using parallel HDF5 (clobber existing content) + h5open(filename, "w", mpi_comm()) do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = n_vars + attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution data + for v in 1:n_vars + # Need to create dataset explicitly in parallel case + var = create_dataset(file, "/variables_$v", datatype(eltype(data)), + dataspace((ndofsglobal(mesh, dg, cache),))) + # Write data of each process in slices (ranks start with 0) + slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2] + # Convert to 1D array + var[slice] = vec(data[v, .., :]) + # Add variable name as attribute + attributes(var)["name"] = varnames(solution_variables, equations)[v] + end + + # Store element variables + for (v, (key, element_variable)) in enumerate(element_variables) + # Need to create dataset explicitly in parallel case + var = create_dataset(file, "/element_variables_$v", + datatype(eltype(element_variable)), + dataspace((nelementsglobal(dg, cache),))) + + # Write data of each process in slices (ranks start with 0) + slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2] + # Add to file + var[slice] = element_variable + # Add variable name as attribute + attributes(var)["name"] = string(key) + end end - end - return filename + return filename end - function save_solution_file_on_root(data, time, dt, timestep, n_vars, - mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, - solution_variables, filename, element_variables=Dict{Symbol,Any}()) - - # Calculate element and node counts by MPI rank - element_size = nnodes(dg)^ndims(mesh) - element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) - node_counts = element_counts * Cint(element_size) - - # non-root ranks only send data - if !mpi_isroot() - # Send nodal data to root - for v in 1:n_vars - MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm()) + mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, + equations, dg::DG, cache, + solution_variables, filename, + element_variables = Dict{Symbol, Any}()) + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(mesh) + element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) + + # non-root ranks only send data + if !mpi_isroot() + # Send nodal data to root + for v in 1:n_vars + MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm()) + end + + # Send element data to root + for (key, element_variable) in element_variables + MPI.Gatherv!(element_variable, nothing, mpi_root(), mpi_comm()) + end + + return filename end - # Send element data to root - for (key, element_variable) in element_variables - MPI.Gatherv!(element_variable, nothing, mpi_root(), mpi_comm()) + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = n_vars + attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar + attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar + attributes(file)["timestep"] = timestep + + # Store each variable of the solution data + for v in 1:n_vars + # Convert to 1D array + recv = Vector{eltype(data)}(undef, sum(node_counts)) + MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), + mpi_root(), mpi_comm()) + file["variables_$v"] = recv + + # Add variable name as attribute + var = file["variables_$v"] + attributes(var)["name"] = varnames(solution_variables, equations)[v] + end + + # Store element variables + for (v, (key, element_variable)) in enumerate(element_variables) + # Add to file + recv = Vector{eltype(data)}(undef, sum(element_counts)) + MPI.Gatherv!(element_variable, MPI.VBuffer(recv, element_counts), + mpi_root(), mpi_comm()) + file["element_variables_$v"] = recv + + # Add variable name as attribute + var = file["element_variables_$v"] + attributes(var)["name"] = string(key) + end end return filename - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = n_vars - attributes(file)["n_elements"] = nelementsglobal(dg, cache) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar - attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar - attributes(file)["timestep"] = timestep - - # Store each variable of the solution data - for v in 1:n_vars - # Convert to 1D array - recv = Vector{eltype(data)}(undef, sum(node_counts)) - MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), mpi_root(), mpi_comm()) - file["variables_$v"] = recv - - # Add variable name as attribute - var = file["variables_$v"] - attributes(var)["name"] = varnames(solution_variables, equations)[v] - end - - # Store element variables - for (v, (key, element_variable)) in enumerate(element_variables) - # Add to file - recv = Vector{eltype(data)}(undef, sum(element_counts)) - MPI.Gatherv!(element_variable, MPI.VBuffer(recv, element_counts), mpi_root(), mpi_comm()) - file["element_variables_$v"] = recv - - # Add variable name as attribute - var = file["element_variables_$v"] - attributes(var)["name"] = string(key) - end - end - - return filename end - - end # @muladd diff --git a/src/callbacks_step/steady_state.jl b/src/callbacks_step/steady_state.jl index 66d04fea704..15c2e834285 100644 --- a/src/callbacks_step/steady_state.jl +++ b/src/callbacks_step/steady_state.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SteadyStateCallback(; abstol=1.0e-8, reltol=1.0e-6) @@ -11,74 +11,71 @@ Terminates the integration when the [`residual_steady_state(du, equations)`](@ref) falls below the threshold specified by `abstol, reltol`. """ -mutable struct SteadyStateCallback{RealT<:Real} - abstol::RealT - reltol::RealT +mutable struct SteadyStateCallback{RealT <: Real} + abstol::RealT + reltol::RealT end -function SteadyStateCallback(; abstol=1.0e-8, reltol=1.0e-6) - abstol, reltol = promote(abstol, reltol) - steady_state_callback = SteadyStateCallback(abstol, reltol) +function SteadyStateCallback(; abstol = 1.0e-8, reltol = 1.0e-6) + abstol, reltol = promote(abstol, reltol) + steady_state_callback = SteadyStateCallback(abstol, reltol) - DiscreteCallback(steady_state_callback, steady_state_callback, - save_positions=(false,false)) + DiscreteCallback(steady_state_callback, steady_state_callback, + save_positions = (false, false)) end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SteadyStateCallback}) - @nospecialize cb # reduce precompilation time - - steady_state_callback = cb.affect! - print(io, "SteadyStateCallback(abstol=", steady_state_callback.abstol, ", ", - "reltol=", steady_state_callback.reltol, ")") -end - -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SteadyStateCallback}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - if get(io, :compact, false) - show(io, cb) - else steady_state_callback = cb.affect! - - setup = [ - "absolute tolerance" => steady_state_callback.abstol, - "relative tolerance" => steady_state_callback.reltol, - ] - summary_box(io, "SteadyStateCallback", setup) - end + print(io, "SteadyStateCallback(abstol=", steady_state_callback.abstol, ", ", + "reltol=", steady_state_callback.reltol, ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SteadyStateCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + steady_state_callback = cb.affect! + + setup = [ + "absolute tolerance" => steady_state_callback.abstol, + "relative tolerance" => steady_state_callback.reltol, + ] + summary_box(io, "SteadyStateCallback", setup) + end +end # affect! (::SteadyStateCallback)(integrator) = terminate!(integrator) - # the condition function (steady_state_callback::SteadyStateCallback)(u_ode, t, integrator) - semi = integrator.p - - u = wrap_array(u_ode, semi) - du = wrap_array(get_du(integrator), semi) - terminate = steady_state_callback(du, u, semi) - if mpi_isparallel() - # MPI.jl doesn't seem to have MPI_C_BOOL - terminate_integer = Int(terminate) - terminate = !iszero(MPI.Allreduce!(Ref(terminate_integer), +, mpi_comm())[]) - end - if mpi_isroot() && terminate - @info " Steady state tolerance reached" steady_state_callback t - end - return terminate + semi = integrator.p + + u = wrap_array(u_ode, semi) + du = wrap_array(get_du(integrator), semi) + terminate = steady_state_callback(du, u, semi) + if mpi_isparallel() + # MPI.jl doesn't seem to have MPI_C_BOOL + terminate_integer = Int(terminate) + terminate = !iszero(MPI.Allreduce!(Ref(terminate_integer), +, mpi_comm())[]) + end + if mpi_isroot() && terminate + @info " Steady state tolerance reached" steady_state_callback t + end + return terminate end -function (steady_state_callback::SteadyStateCallback)(du, u, semi::AbstractSemidiscretization) - steady_state_callback(du, u, mesh_equations_solver_cache(semi)...) +function (steady_state_callback::SteadyStateCallback)(du, u, + semi::AbstractSemidiscretization) + steady_state_callback(du, u, mesh_equations_solver_cache(semi)...) end include("steady_state_dg1d.jl") include("steady_state_dg2d.jl") include("steady_state_dg3d.jl") - - end # @muladd diff --git a/src/callbacks_step/steady_state_dg1d.jl b/src/callbacks_step/steady_state_dg1d.jl index 65951f95d82..9b895de06d5 100644 --- a/src/callbacks_step/steady_state_dg1d.jl +++ b/src/callbacks_step/steady_state_dg1d.jl @@ -3,24 +3,23 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{1}, equations, dg::DG, cache) - @unpack abstol, reltol = steady_state_callback + @unpack abstol, reltol = steady_state_callback - terminate = true - for element in eachelement(dg, cache) - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - du_local = get_node_vars(du, equations, dg, i, element) - threshold = abstol + reltol * residual_steady_state(u_local, equations) - terminate = terminate && residual_steady_state(du_local, equations) <= threshold + terminate = true + for element in eachelement(dg, cache) + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + du_local = get_node_vars(du, equations, dg, i, element) + threshold = abstol + reltol * residual_steady_state(u_local, equations) + terminate = terminate && + residual_steady_state(du_local, equations) <= threshold + end end - end - return terminate + return terminate end - - end # @muladd diff --git a/src/callbacks_step/steady_state_dg2d.jl b/src/callbacks_step/steady_state_dg2d.jl index 4837e77899d..ebb48ce4581 100644 --- a/src/callbacks_step/steady_state_dg2d.jl +++ b/src/callbacks_step/steady_state_dg2d.jl @@ -3,24 +3,23 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{2}, equations, dg::DG, cache) - @unpack abstol, reltol = steady_state_callback + @unpack abstol, reltol = steady_state_callback - terminate = true - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - du_local = get_node_vars(du, equations, dg, i, j, element) - threshold = abstol + reltol * residual_steady_state(u_local, equations) - terminate = terminate && residual_steady_state(du_local, equations) <= threshold + terminate = true + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + du_local = get_node_vars(du, equations, dg, i, j, element) + threshold = abstol + reltol * residual_steady_state(u_local, equations) + terminate = terminate && + residual_steady_state(du_local, equations) <= threshold + end end - end - return terminate + return terminate end - - end # @muladd diff --git a/src/callbacks_step/steady_state_dg3d.jl b/src/callbacks_step/steady_state_dg3d.jl index d154d5e956d..69c172f9636 100644 --- a/src/callbacks_step/steady_state_dg3d.jl +++ b/src/callbacks_step/steady_state_dg3d.jl @@ -3,24 +3,23 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{3}, equations, dg::DG, cache) - @unpack abstol, reltol = steady_state_callback + @unpack abstol, reltol = steady_state_callback - terminate = true - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, k, element) - du_local = get_node_vars(du, equations, dg, i, j, k, element) - threshold = abstol + reltol * residual_steady_state(u_local, equations) - terminate = terminate && residual_steady_state(du_local, equations) <= threshold + terminate = true + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, k, element) + du_local = get_node_vars(du, equations, dg, i, j, k, element) + threshold = abstol + reltol * residual_steady_state(u_local, equations) + terminate = terminate && + residual_steady_state(du_local, equations) <= threshold + end end - end - return terminate + return terminate end - - end # @muladd diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index 13e4f9dfa54..9e9f2d4885b 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ StepsizeCallback(; cfl=1.0) @@ -12,100 +12,98 @@ Set the time step size according to a CFL condition with CFL number `cfl` if the time integration method isn't adaptive itself. """ mutable struct StepsizeCallback{RealT} - cfl_number::RealT + cfl_number::RealT end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:StepsizeCallback}) - @nospecialize cb # reduce precompilation time - - stepsize_callback = cb.affect! - @unpack cfl_number = stepsize_callback - print(io, "StepsizeCallback(cfl_number=", cfl_number, ")") -end - -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:StepsizeCallback}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - if get(io, :compact, false) - show(io, cb) - else stepsize_callback = cb.affect! - - setup = [ - "CFL number" => stepsize_callback.cfl_number, - ] - summary_box(io, "StepsizeCallback", setup) - end + @unpack cfl_number = stepsize_callback + print(io, "StepsizeCallback(cfl_number=", cfl_number, ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:StepsizeCallback}) + @nospecialize cb # reduce precompilation time -function StepsizeCallback(; cfl::Real=1.0) - - stepsize_callback = StepsizeCallback(cfl) + if get(io, :compact, false) + show(io, cb) + else + stepsize_callback = cb.affect! - DiscreteCallback(stepsize_callback, stepsize_callback, # the first one is the condition, the second the affect! - save_positions=(false,false), - initialize=initialize!) + setup = [ + "CFL number" => stepsize_callback.cfl_number, + ] + summary_box(io, "StepsizeCallback", setup) + end end +function StepsizeCallback(; cfl::Real = 1.0) + stepsize_callback = StepsizeCallback(cfl) -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:StepsizeCallback} - cb.affect!(integrator) + DiscreteCallback(stepsize_callback, stepsize_callback, # the first one is the condition, the second the affect! + save_positions = (false, false), + initialize = initialize!) end +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: StepsizeCallback} + cb.affect!(integrator) +end # this method is called to determine whether the callback should be activated function (stepsize_callback::StepsizeCallback)(u, t, integrator) - return true + return true end - # This method is called as callback during the time integration. @inline function (stepsize_callback::StepsizeCallback)(integrator) - # TODO: Taal decide, shall we set the time step even if the integrator is adaptive? - if !integrator.opts.adaptive - t = integrator.t - u_ode = integrator.u - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - @unpack cfl_number = stepsize_callback - u = wrap_array(u_ode, mesh, equations, solver, cache) - - dt = @trixi_timeit timer() "calculate dt" cfl_number * max_dt(u, t, mesh, - have_constant_speed(equations), equations, - solver, cache) - set_proposed_dt!(integrator, dt) - integrator.opts.dtmax = dt - integrator.dtcache = dt - end - - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + # TODO: Taal decide, shall we set the time step even if the integrator is adaptive? + if !integrator.opts.adaptive + t = integrator.t + u_ode = integrator.u + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + @unpack cfl_number = stepsize_callback + u = wrap_array(u_ode, mesh, equations, solver, cache) + + dt = @trixi_timeit timer() "calculate dt" begin + cfl_number * max_dt(u, t, mesh, have_constant_speed(equations), equations, + solver, cache) + end + + set_proposed_dt!(integrator, dt) + integrator.opts.dtmax = dt + integrator.dtcache = dt + end + + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end - # Time integration methods from the DiffEq ecosystem without adaptive time stepping on their own # such as `CarpenterKennedy2N54` require passing `dt=...` in `solve(ode, ...)`. Since we don't have # an integrator at this stage but only the ODE, this method will be used there. It's called in # many examples in `solve(ode, ..., dt=stepsize_callback(ode), ...)`. -function (cb::DiscreteCallback{Condition,Affect!})(ode::ODEProblem) where {Condition, Affect!<:StepsizeCallback} - stepsize_callback = cb.affect! - @unpack cfl_number = stepsize_callback - u_ode = ode.u0 - t = first(ode.tspan) - semi = ode.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - - return cfl_number * max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache) -end +function (cb::DiscreteCallback{Condition, Affect!})(ode::ODEProblem) where {Condition, + Affect! <: + StepsizeCallback + } + stepsize_callback = cb.affect! + @unpack cfl_number = stepsize_callback + u_ode = ode.u0 + t = first(ode.tspan) + semi = ode.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array(u_ode, mesh, equations, solver, cache) + return cfl_number * + max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache) +end include("stepsize_dg1d.jl") include("stepsize_dg2d.jl") include("stepsize_dg3d.jl") - - end # @muladd diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index 0cb9932335d..edc25ec78f6 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -3,87 +3,82 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function max_dt(u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1 = zero(max_scaled_speed) - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - λ1, = max_abs_speeds(u_node, equations) - max_λ1 = max(max_λ1, λ1) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1 = zero(max_scaled_speed) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + lambda1, = max_abs_speeds(u_node, equations) + max_lambda1 = max(max_lambda1, lambda1) + end + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1) end - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1) - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1, = max_abs_speeds(equations) - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1) - end + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1, = max_abs_speeds(equations) + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1) + end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) - for element in eachelement(dg, cache) - max_λ1 = zero(max_scaled_speed) + for element in eachelement(dg, cache) + max_lambda1 = zero(max_scaled_speed) - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - λ1, = max_abs_speeds(u_node, equations) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + lambda1, = max_abs_speeds(u_node, equations) - inv_jacobian = cache.elements.inverse_jacobian[i, element] + inv_jacobian = cache.elements.inverse_jacobian[i, element] - max_λ1 = max(max_λ1, inv_jacobian * λ1) - end + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1) + end - max_scaled_speed = max(max_scaled_speed, max_λ1) - end + max_scaled_speed = max(max_scaled_speed, max_lambda1) + end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) - for element in eachelement(dg, cache) - max_λ1, = max_abs_speeds(equations) + for element in eachelement(dg, cache) + max_lambda1, = max_abs_speeds(equations) - for i in eachnode(dg) - inv_jacobian = cache.elements.inverse_jacobian[i, element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1) + for i in eachnode(dg) + inv_jacobian = cache.elements.inverse_jacobian[i, element] + max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1) + end end - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - - end # @muladd diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index 3c7d288d8d2..89a2b2b8350 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -3,168 +3,171 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function max_dt(u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1 = max_λ2 = zero(max_scaled_speed) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - λ1, λ2 = max_abs_speeds(u_node, equations) - max_λ1 = max(max_λ1, λ1) - max_λ2 = max(max_λ2, λ2) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1 = max_lambda2 = zero(max_scaled_speed) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + lambda1, lambda2 = max_abs_speeds(u_node, equations) + max_lambda1 = max(max_lambda1, lambda1) + max_lambda2 = max(max_lambda2, lambda2) + end + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * (max_lambda1 + max_lambda2)) end - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2)) - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1, max_λ2 = max_abs_speeds(equations) - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2)) - end + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1, max_lambda2 = max_abs_speeds(equations) + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * (max_lambda1 + max_lambda2)) + end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::ParallelTreeMesh{2}, constant_speed::False, equations, dg::DG, cache) - # call the method accepting a general `mesh::TreeMesh{2}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::TreeMesh{2}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), TreeMesh{2}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - function max_dt(u, t, mesh::ParallelTreeMesh{2}, constant_speed::True, equations, dg::DG, cache) - # call the method accepting a general `mesh::TreeMesh{2}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::TreeMesh{2}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), TreeMesh{2}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors, inverse_jacobian = cache.elements + @unpack contravariant_vectors, inverse_jacobian = cache.elements - for element in eachelement(dg, cache) - max_λ1 = max_λ2 = zero(max_scaled_speed) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - λ1, λ2 = max_abs_speeds(u_node, equations) + for element in eachelement(dg, cache) + max_lambda1 = max_lambda2 = zero(max_scaled_speed) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + lambda1, lambda2 = max_abs_speeds(u_node, equations) - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) - λ1_transformed = abs(Ja11 * λ1 + Ja12 * λ2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) - λ2_transformed = abs(Ja21 * λ1 + Ja22 * λ2) + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, + element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, + element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) - inv_jacobian = abs(inverse_jacobian[i, j, element]) + inv_jacobian = abs(inverse_jacobian[i, j, element]) - max_λ1 = max(max_λ1, λ1_transformed * inv_jacobian) - max_λ2 = max(max_λ2, λ2_transformed * inv_jacobian) - end + max_lambda1 = max(max_lambda1, lambda1_transformed * inv_jacobian) + max_lambda2 = max(max_lambda2, lambda2_transformed * inv_jacobian) + end - max_scaled_speed = max(max_scaled_speed, max_λ1 + max_λ2) - end + max_scaled_speed = max(max_scaled_speed, max_lambda1 + max_lambda2) + end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, constant_speed::True, equations, dg::DG, cache) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - max_λ1, max_λ2 = max_abs_speeds(equations) - - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) - λ1_transformed = abs(Ja11 * max_λ1 + Ja12 * max_λ2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) - λ2_transformed = abs(Ja21 * max_λ1 + Ja22 * max_λ2) - - inv_jacobian = abs(inverse_jacobian[i, j, element]) - max_scaled_speed = max(max_scaled_speed, inv_jacobian * (λ1_transformed + λ2_transformed)) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + max_lambda1, max_lambda2 = max_abs_speeds(equations) + + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, + element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, + element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * + (lambda1_transformed + lambda2_transformed)) + end end - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::ParallelP4estMesh{2}, constant_speed::False, equations, dg::DG, cache) - # call the method accepting a general `mesh::P4estMesh{2}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::P4estMesh{2}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), P4estMesh{2}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - function max_dt(u, t, mesh::ParallelP4estMesh{2}, constant_speed::True, equations, dg::DG, cache) - # call the method accepting a general `mesh::P4estMesh{2}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::P4estMesh{2}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), P4estMesh{2}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - - end # @muladd diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 492ee3d9a08..c9ab7c478a8 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -3,142 +3,151 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function max_dt(u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1 = max_λ2 = max_λ3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - λ1, λ2, λ3 = max_abs_speeds(u_node, equations) - max_λ1 = max(max_λ1, λ1) - max_λ2 = max(max_λ2, λ2) - max_λ3 = max(max_λ3, λ3) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + max_lambda1 = max(max_lambda1, lambda1) + max_lambda2 = max(max_lambda2, lambda2) + max_lambda3 = max(max_lambda3, lambda3) + end + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3)) end - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2 + max_λ3)) - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - for element in eachelement(dg, cache) - max_λ1, max_λ2, max_λ3 = max_abs_speeds(equations) - inv_jacobian = cache.elements.inverse_jacobian[element] - max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2 + max_λ3)) - end + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + for element in eachelement(dg, cache) + max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) + inv_jacobian = cache.elements.inverse_jacobian[element] + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3)) + end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, constant_speed::False, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - @unpack contravariant_vectors = cache.elements - - for element in eachelement(dg, cache) - max_λ1 = max_λ2 = max_λ3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - λ1, λ2, λ3 = max_abs_speeds(u_node, equations) - - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - λ1_transformed = abs(Ja11 * λ1 + Ja12 * λ2 + Ja13 * λ3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - λ2_transformed = abs(Ja21 * λ1 + Ja22 * λ2 + Ja23 * λ3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - λ3_transformed = abs(Ja31 * λ1 + Ja32 * λ2 + Ja33 * λ3) - - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) - - max_λ1 = max(max_λ1, inv_jacobian * λ1_transformed) - max_λ2 = max(max_λ2, inv_jacobian * λ2_transformed) - max_λ3 = max(max_λ3, inv_jacobian * λ3_transformed) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + @unpack contravariant_vectors = cache.elements + + for element in eachelement(dg, cache) + max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + k, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + k, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + k, element) + lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + + inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) + max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) + max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) + end + + max_scaled_speed = max(max_scaled_speed, + max_lambda1 + max_lambda2 + max_lambda3) end - max_scaled_speed = max(max_scaled_speed, max_λ1 + max_λ2 + max_λ3) - end - - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, constant_speed::True, equations, dg::DG, cache) - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - - @unpack contravariant_vectors = cache.elements - - max_λ1, max_λ2, max_λ3 = max_abs_speeds(equations) - - for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - λ1_transformed = abs(Ja11 * max_λ1 + Ja12 * max_λ2 + Ja13 * max_λ3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - λ2_transformed = abs(Ja21 * max_λ1 + Ja22 * max_λ2 + Ja23 * max_λ3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - λ3_transformed = abs(Ja31 * max_λ1 + Ja32 * max_λ2 + Ja33 * max_λ3) - - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) - - max_scaled_speed = max(max_scaled_speed, - inv_jacobian * (λ1_transformed + λ2_transformed + λ3_transformed)) + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = nextfloat(zero(t)) + + @unpack contravariant_vectors = cache.elements + + max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) + + for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + k, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 + + Ja13 * max_lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + k, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 + + Ja23 * max_lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + k, element) + lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + + Ja33 * max_lambda3) + + inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + + max_scaled_speed = max(max_scaled_speed, + inv_jacobian * + (lambda1_transformed + lambda2_transformed + + lambda3_transformed)) + end end - end - return 2 / (nnodes(dg) * max_scaled_speed) + return 2 / (nnodes(dg) * max_scaled_speed) end - function max_dt(u, t, mesh::ParallelP4estMesh{3}, constant_speed::False, equations, dg::DG, cache) - # call the method accepting a general `mesh::P4estMesh{3}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::P4estMesh{3}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), P4estMesh{3}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - function max_dt(u, t, mesh::ParallelP4estMesh{3}, constant_speed::True, equations, dg::DG, cache) - # call the method accepting a general `mesh::P4estMesh{3}` - # TODO: MPI, we should improve this; maybe we should dispatch on `u` - # and create some MPI array type, overloading broadcasting and mapreduce etc. - # Then, this specific array type should also work well with DiffEq etc. - dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, - typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) - dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] - - return dt + # call the method accepting a general `mesh::P4estMesh{3}` + # TODO: MPI, we should improve this; maybe we should dispatch on `u` + # and create some MPI array type, overloading broadcasting and mapreduce etc. + # Then, this specific array type should also work well with DiffEq etc. + dt = invoke(max_dt, + Tuple{typeof(u), typeof(t), P4estMesh{3}, + typeof(constant_speed), typeof(equations), typeof(dg), + typeof(cache)}, + u, t, mesh, constant_speed, equations, dg, cache) + dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[] + + return dt end - - end # @muladd diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 37428f49651..a73b2a1913b 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -3,12 +3,11 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent summary_callback(u, t, integrator) = false # when used as condition; never call the summary callback during the simulation summary_callback(integrator) = u_modified!(integrator, false) # the summary callback does nothing when called accidentally - """ SummaryCallback() @@ -17,211 +16,208 @@ beginning of a simulation and then resets the timer. When the returned callback directly, the current timer values are shown. """ function SummaryCallback() - DiscreteCallback(summary_callback, summary_callback, - save_positions=(false,false), - initialize=initialize_summary_callback) + DiscreteCallback(summary_callback, summary_callback, + save_positions = (false, false), + initialize = initialize_summary_callback) end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(summary_callback)}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - print(io, "SummaryCallback") + print(io, "SummaryCallback") end - # Format a key/value pair for output from the SummaryCallback -function format_key_value_line(key::AbstractString, value::AbstractString, key_width, total_width; - indentation_level=0, guide='…', filler='…', prefix="│ ", suffix=" │") - @assert key_width < total_width - line = prefix - # Indent the key as requested (or not at all if `indentation_level == 0`) - indentation = prefix^indentation_level - reduced_key_width = key_width - length(indentation) - squeezed_key = indentation * squeeze(key, reduced_key_width, filler=filler) - line *= squeezed_key - line *= ": " - short = key_width - length(squeezed_key) - if short <= 1 - line *= " " - else - line *= guide^(short-1) * " " - end - value_width = total_width - length(prefix) - length(suffix) - key_width - 2 - squeezed_value = squeeze(value, value_width, filler=filler) - line *= squeezed_value - short = value_width - length(squeezed_value) - line *= " "^short - line *= suffix - - @assert length(line) == total_width "should not happen: algorithm error!" - - return line +function format_key_value_line(key::AbstractString, value::AbstractString, key_width, + total_width; + indentation_level = 0, guide = '…', filler = '…', + prefix = "│ ", suffix = " │") + @assert key_width < total_width + line = prefix + # Indent the key as requested (or not at all if `indentation_level == 0`) + indentation = prefix^indentation_level + reduced_key_width = key_width - length(indentation) + squeezed_key = indentation * squeeze(key, reduced_key_width, filler = filler) + line *= squeezed_key + line *= ": " + short = key_width - length(squeezed_key) + if short <= 1 + line *= " " + else + line *= guide^(short - 1) * " " + end + value_width = total_width - length(prefix) - length(suffix) - key_width - 2 + squeezed_value = squeeze(value, value_width, filler = filler) + line *= squeezed_value + short = value_width - length(squeezed_value) + line *= " "^short + line *= suffix + + @assert length(line)==total_width "should not happen: algorithm error!" + + return line +end +function format_key_value_line(key, value, args...; kwargs...) + format_key_value_line(string(key), string(value), args...; kwargs...) end -format_key_value_line(key, value, args...; kwargs...) = format_key_value_line(string(key), string(value), args...; kwargs...) # Squeeze a string to fit into a maximum width by deleting characters from the center -function squeeze(message, max_width; filler::Char='…') - @assert max_width >= 3 "squeezing works only for a minimum `max_width` of 3" +function squeeze(message, max_width; filler::Char = '…') + @assert max_width>=3 "squeezing works only for a minimum `max_width` of 3" - length(message) <= max_width && return message + length(message) <= max_width && return message - keep_front = div(max_width, 2) - keep_back = div(max_width, 2) - (isodd(max_width) ? 0 : 1) - remove_back = length(message) - keep_front - remove_front = length(message) - keep_back - squeezed = (chop(message, head=0, tail=remove_back) - * filler * - chop(message, head=remove_front, tail=0)) + keep_front = div(max_width, 2) + keep_back = div(max_width, 2) - (isodd(max_width) ? 0 : 1) + remove_back = length(message) - keep_front + remove_front = length(message) - keep_back + squeezed = (chop(message, head = 0, tail = remove_back) + * filler * + chop(message, head = remove_front, tail = 0)) - @assert length(squeezed) == max_width "`$(length(squeezed)) != $max_width` should not happen: algorithm error!" + @assert length(squeezed)==max_width "`$(length(squeezed)) != $max_width` should not happen: algorithm error!" - return squeezed + return squeezed end # Print a summary with a box around it with a given heading and a setup of key=>value pairs -function summary_box(io::IO, heading, setup=[]) - summary_header(io, heading) - for (key, value) in setup - summary_line(io, key, value) - end - summary_footer(io) +function summary_box(io::IO, heading, setup = []) + summary_header(io, heading) + for (key, value) in setup + summary_line(io, key, value) + end + summary_footer(io) end -function summary_header(io, heading; total_width=100, indentation_level=0) - total_width = get(io, :total_width, total_width) - indentation_level = get(io, :indentation_level, indentation_level) +function summary_header(io, heading; total_width = 100, indentation_level = 0) + total_width = get(io, :total_width, total_width) + indentation_level = get(io, :indentation_level, indentation_level) - @assert indentation_level >= 0 "indentation level may not be negative" + @assert indentation_level>=0 "indentation level may not be negative" - # If indentation level is greater than zero, we assume the header has already been printed - indentation_level > 0 && return + # If indentation level is greater than zero, we assume the header has already been printed + indentation_level > 0 && return - # Print header - println(io, "┌" * "─"^(total_width-2) * "┐") - println(io, "│ " * heading * " "^(total_width - length(heading) - 4) * " │") - println(io, "│ " * "═"^length(heading) * " "^(total_width - length(heading) - 4) * " │") + # Print header + println(io, "┌" * "─"^(total_width - 2) * "┐") + println(io, "│ " * heading * " "^(total_width - length(heading) - 4) * " │") + println(io, + "│ " * "═"^length(heading) * " "^(total_width - length(heading) - 4) * " │") end -function summary_line(io, key, value; key_width=30, total_width=100, indentation_level=0) - # Printing is not performance-critical, so we can use `@nospecialize` to reduce latency - @nospecialize value # reduce precompilation time +function summary_line(io, key, value; key_width = 30, total_width = 100, + indentation_level = 0) + # Printing is not performance-critical, so we can use `@nospecialize` to reduce latency + @nospecialize value # reduce precompilation time - key_width = get(io, :key_width, key_width) - total_width = get(io, :total_width, total_width) - indentation_level = get(io, :indentation_level, indentation_level) + key_width = get(io, :key_width, key_width) + total_width = get(io, :total_width, total_width) + indentation_level = get(io, :indentation_level, indentation_level) - s = format_key_value_line(key, value, key_width, total_width, - indentation_level=indentation_level) + s = format_key_value_line(key, value, key_width, total_width, + indentation_level = indentation_level) - println(io, s) + println(io, s) end -function summary_footer(io; total_width=100, indentation_level=0) - total_width = get(io, :total_width, 100) - indentation_level = get(io, :indentation_level, 0) +function summary_footer(io; total_width = 100, indentation_level = 0) + total_width = get(io, :total_width, 100) + indentation_level = get(io, :indentation_level, 0) - if indentation_level == 0 - s = "└" * "─"^(total_width-2) * "┘" - else - s = "" - end + if indentation_level == 0 + s = "└" * "─"^(total_width - 2) * "┘" + else + s = "" + end - print(io, s) + print(io, s) end -@inline increment_indent(io) = IOContext(io, :indentation_level => get(io, :indentation_level, 0) + 1) - +@inline function increment_indent(io) + IOContext(io, :indentation_level => get(io, :indentation_level, 0) + 1) +end # Print information about the current simulation setup # Note: This is called *after* all initialization is done, but *before* the first time step function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator) + mpi_isroot() || return nothing - mpi_isroot() || return nothing - - print_startup_message() - - io = stdout - io_context = IOContext(io, - :compact => false, - :key_width => 30, - :total_width => 100, - :indentation_level => 0) - - semi = integrator.p - show(io_context, MIME"text/plain"(), semi) - println(io, "\n") - mesh, equations, solver, _ = mesh_equations_solver_cache(semi) - show(io_context, MIME"text/plain"(), mesh) - println(io, "\n") - show(io_context, MIME"text/plain"(), equations) - println(io, "\n") - show(io_context, MIME"text/plain"(), solver) - println(io, "\n") - - callbacks = integrator.opts.callback - if callbacks isa CallbackSet - for cb in callbacks.continuous_callbacks - show(io_context, MIME"text/plain"(), cb) - println(io, "\n") - end - for cb in callbacks.discrete_callbacks - # Do not show ourselves - cb.affect! === summary_callback && continue + print_startup_message() - show(io_context, MIME"text/plain"(), cb) - println(io, "\n") - end - else - show(io_context, MIME"text/plain"(), callbacks) + io = stdout + io_context = IOContext(io, + :compact => false, + :key_width => 30, + :total_width => 100, + :indentation_level => 0) + + semi = integrator.p + show(io_context, MIME"text/plain"(), semi) + println(io, "\n") + mesh, equations, solver, _ = mesh_equations_solver_cache(semi) + show(io_context, MIME"text/plain"(), mesh) + println(io, "\n") + show(io_context, MIME"text/plain"(), equations) + println(io, "\n") + show(io_context, MIME"text/plain"(), solver) println(io, "\n") - end - - # time integration - setup = Pair{String,Any}[ - "Start time" => first(integrator.sol.prob.tspan), - "Final time" => last(integrator.sol.prob.tspan), - "time integrator" => integrator.alg |> typeof |> nameof, - "adaptive" => integrator.opts.adaptive, - ] - if integrator.opts.adaptive - push!(setup, - "abstol" => integrator.opts.abstol, - "reltol" => integrator.opts.reltol, - "controller" => integrator.opts.controller, - ) - end - summary_box(io, "Time integration", setup) - println() - - # technical details - setup = Pair{String,Any}[ - "#threads" => Threads.nthreads(), - ] - if mpi_isparallel() - push!(setup, - "#MPI ranks" => mpi_nranks(), - ) - end - summary_box(io, "Environment information", setup) - println() - - reset_timer!(timer()) - - return nothing -end + callbacks = integrator.opts.callback + if callbacks isa CallbackSet + for cb in callbacks.continuous_callbacks + show(io_context, MIME"text/plain"(), cb) + println(io, "\n") + end + for cb in callbacks.discrete_callbacks + # Do not show ourselves + cb.affect! === summary_callback && continue + + show(io_context, MIME"text/plain"(), cb) + println(io, "\n") + end + else + show(io_context, MIME"text/plain"(), callbacks) + println(io, "\n") + end -function (cb::DiscreteCallback{Condition,Affect!})(io::IO=stdout) where {Condition, Affect!<:typeof(summary_callback)} + # time integration + setup = Pair{String, Any}["Start time" => first(integrator.sol.prob.tspan), + "Final time" => last(integrator.sol.prob.tspan), + "time integrator" => integrator.alg |> typeof |> nameof, + "adaptive" => integrator.opts.adaptive] + if integrator.opts.adaptive + push!(setup, + "abstol" => integrator.opts.abstol, + "reltol" => integrator.opts.reltol, + "controller" => integrator.opts.controller) + end + summary_box(io, "Time integration", setup) + println() + + # technical details + setup = Pair{String, Any}["#threads" => Threads.nthreads()] + if mpi_isparallel() + push!(setup, + "#MPI ranks" => mpi_nranks()) + end + summary_box(io, "Environment information", setup) + println() - mpi_isroot() || return nothing + reset_timer!(timer()) - TimerOutputs.complement!(timer()) - print_timer(io, timer(), title="Trixi.jl", - allocations=true, linechars=:unicode, compact=false) - println(io) - return nothing + return nothing end - +function (cb::DiscreteCallback{Condition, Affect!})(io::IO = stdout) where {Condition, + Affect! <: + typeof(summary_callback) + } + mpi_isroot() || return nothing + + TimerOutputs.complement!(timer()) + print_timer(io, timer(), title = "Trixi.jl", + allocations = true, linechars = :unicode, compact = false) + println(io) + return nothing +end end # @muladd diff --git a/src/callbacks_step/time_series.jl b/src/callbacks_step/time_series.jl index 01282fbb2c3..7baa6b9c5a1 100644 --- a/src/callbacks_step/time_series.jl +++ b/src/callbacks_step/time_series.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ TimeSeriesCallback(semi, point_coordinates; @@ -26,190 +26,192 @@ types used in the solver and the cache. !!! warning "Experimental implementation" This is an experimental feature and may change in future releases. """ -mutable struct TimeSeriesCallback{RealT<:Real, uEltype<:Real, SolutionVariables, VariableNames, Cache} - interval::Int - solution_variables::SolutionVariables - variable_names::VariableNames - output_directory::String - filename::String - point_coordinates::Array{RealT, 2} - # Point data is stored as a vector of vectors of the solution data type: - # * the "outer" `Vector` contains one vector for each point at which a time_series is recorded - # * the "inner" `Vector` contains the actual time series for a single point, - # with each record adding "n_vars" entries - # The reason for using this data structure is that the length of the inner vectors needs to be - # increased for each record, which can only be realized in Julia using ordinary `Vector`s. - point_data::Vector{Vector{uEltype}} - time::Vector{RealT} - step::Vector{Int} - time_series_cache::Cache +mutable struct TimeSeriesCallback{RealT <: Real, uEltype <: Real, SolutionVariables, + VariableNames, Cache} + interval::Int + solution_variables::SolutionVariables + variable_names::VariableNames + output_directory::String + filename::String + point_coordinates::Array{RealT, 2} + # Point data is stored as a vector of vectors of the solution data type: + # * the "outer" `Vector` contains one vector for each point at which a time_series is recorded + # * the "inner" `Vector` contains the actual time series for a single point, + # with each record adding "n_vars" entries + # The reason for using this data structure is that the length of the inner vectors needs to be + # increased for each record, which can only be realized in Julia using ordinary `Vector`s. + point_data::Vector{Vector{uEltype}} + time::Vector{RealT} + step::Vector{Int} + time_series_cache::Cache end - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}) - @nospecialize cb # reduce precompilation time - - time_series_callback = cb.affect! - @unpack interval, solution_variables, output_directory, filename = time_series_callback - print(io, "TimeSeriesCallback(", - "interval=", interval, ", ", - "solution_variables=", interval, ", ", - "output_directory=", "\"output_directory\"", ", ", - "filename=", "\"filename\"", - ")") -end + @nospecialize cb # reduce precompilation time -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}) - @nospecialize cb # reduce precompilation time - - if get(io, :compact, false) - show(io, cb) - else time_series_callback = cb.affect! - - setup = [ - "#points" => size(time_series_callback.point_coordinates, 2), - "interval" => time_series_callback.interval, - "solution_variables" => time_series_callback.solution_variables, - "output_directory" => time_series_callback.output_directory, - "filename" => time_series_callback.filename, - ] - summary_box(io, "TimeSeriesCallback", setup) - end + @unpack interval, solution_variables, output_directory, filename = time_series_callback + print(io, "TimeSeriesCallback(", + "interval=", interval, ", ", + "solution_variables=", interval, ", ", + "output_directory=", "\"output_directory\"", ", ", + "filename=", "\"filename\"", + ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + time_series_callback = cb.affect! + + setup = [ + "#points" => size(time_series_callback.point_coordinates, 2), + "interval" => time_series_callback.interval, + "solution_variables" => time_series_callback.solution_variables, + "output_directory" => time_series_callback.output_directory, + "filename" => time_series_callback.filename, + ] + summary_box(io, "TimeSeriesCallback", setup) + end +end # Main constructor function TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates; - interval::Integer=1, - solution_variables=cons2cons, - output_directory="out", - filename="time_series.h5", - RealT=real(solver), - uEltype=eltype(cache.elements)) - # check arguments - if !(interval isa Integer && interval >= 0) - throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)")) - end - - if ndims(point_coordinates) != 2 || size(point_coordinates, 2) != ndims(mesh) - throw(ArgumentError("`point_coordinates` must be a matrix of size n_points × ndims")) - end - - # Transpose point_coordinates to our usual format [ndims, n_points] - # Note: They are accepted in a different format to allow direct input from `readdlm` - point_coordinates_ = permutedims(point_coordinates) - - # Invoke callback every `interval` time steps or after final step (for storing the data on disk) - if interval > 0 - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - condition = (u, t, integrator) -> ( (integrator.stats.naccept % interval == 0 && - !(integrator.stats.naccept == 0 && integrator.iter > 0)) || - isfinished(integrator)) - else # disable the callback for interval == 0 - condition = (u, t, integrator) -> false - end - - # Create data structures that are to be filled by the callback - variable_names = varnames(solution_variables, equations) - n_points = size(point_coordinates_, 2) - point_data = Vector{uEltype}[Vector{uEltype}() for _ in 1:n_points] - time = Vector{RealT}() - step = Vector{Int}() - time_series_cache = create_cache_time_series(point_coordinates_, mesh, solver, cache) - - time_series_callback = TimeSeriesCallback(interval, - solution_variables, - variable_names, - output_directory, - filename, - point_coordinates_, - point_data, - time, - step, - time_series_cache) - - return DiscreteCallback(condition, time_series_callback, save_positions=(false,false)) -end + interval::Integer = 1, + solution_variables = cons2cons, + output_directory = "out", + filename = "time_series.h5", + RealT = real(solver), + uEltype = eltype(cache.elements)) + # check arguments + if !(interval isa Integer && interval >= 0) + throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)")) + end + if ndims(point_coordinates) != 2 || size(point_coordinates, 2) != ndims(mesh) + throw(ArgumentError("`point_coordinates` must be a matrix of size n_points × ndims")) + end + + # Transpose point_coordinates to our usual format [ndims, n_points] + # Note: They are accepted in a different format to allow direct input from `readdlm` + point_coordinates_ = permutedims(point_coordinates) + + # Invoke callback every `interval` time steps or after final step (for storing the data on disk) + if interval > 0 + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + condition = (u, t, integrator) -> ((integrator.stats.naccept % interval == 0 && + !(integrator.stats.naccept == 0 && + integrator.iter > 0)) || + isfinished(integrator)) + else # disable the callback for interval == 0 + condition = (u, t, integrator) -> false + end + + # Create data structures that are to be filled by the callback + variable_names = varnames(solution_variables, equations) + n_points = size(point_coordinates_, 2) + point_data = Vector{uEltype}[Vector{uEltype}() for _ in 1:n_points] + time = Vector{RealT}() + step = Vector{Int}() + time_series_cache = create_cache_time_series(point_coordinates_, mesh, solver, + cache) + + time_series_callback = TimeSeriesCallback(interval, + solution_variables, + variable_names, + output_directory, + filename, + point_coordinates_, + point_data, + time, + step, + time_series_cache) + + return DiscreteCallback(condition, time_series_callback, + save_positions = (false, false)) +end # Convenience constructor that unpacks the semidiscretization into mesh, equations, solver, cache function TimeSeriesCallback(semi, point_coordinates; kwargs...) - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates; kwargs...) + return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates; + kwargs...) end - # Convenience constructor that converts a vector of points into a Trixi.jl-style coordinate array -function TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates::AbstractVector; +function TimeSeriesCallback(mesh, equations, solver, cache, + point_coordinates::AbstractVector; kwargs...) - # Coordinates are usually stored in [ndims, n_points], but here as [n_points, ndims] - n_points = length(point_coordinates) - point_coordinates_ = Matrix{eltype(eltype(point_coordinates))}(undef, n_points, ndims(mesh)) - - for p in 1:n_points - for d in 1:ndims(mesh) - point_coordinates_[p, d] = point_coordinates[p][d] + # Coordinates are usually stored in [ndims, n_points], but here as [n_points, ndims] + n_points = length(point_coordinates) + point_coordinates_ = Matrix{eltype(eltype(point_coordinates))}(undef, n_points, + ndims(mesh)) + + for p in 1:n_points + for d in 1:ndims(mesh) + point_coordinates_[p, d] = point_coordinates[p][d] + end end - end - return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates_; kwargs...) + return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates_; + kwargs...) end - # This method is called as callback during the time integration. function (time_series_callback::TimeSeriesCallback)(integrator) - # Ensure this is not accidentally used with AMR enabled - if uses_amr(integrator.opts.callback) - error("the TimeSeriesCallback does not work with AMR enabled") - end - - @unpack interval = time_series_callback - - # Create record if in correct interval (needs to be checked since the callback is also called - # after the final step for storing the data on disk, independent of the current interval) - if integrator.stats.naccept % interval == 0 - @trixi_timeit timer() "time series" begin - # Store time and step - push!(time_series_callback.time, integrator.t) - push!(time_series_callback.step, integrator.stats.naccept) - - # Unpack data - u_ode = integrator.u - semi = integrator.p - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - - @unpack (point_data, solution_variables, - variable_names, time_series_cache) = time_series_callback - - # Record state at points (solver/mesh-dependent implementation) - record_state_at_points!(point_data, u, solution_variables, length(variable_names), mesh, - equations, solver, time_series_cache) + # Ensure this is not accidentally used with AMR enabled + if uses_amr(integrator.opts.callback) + error("the TimeSeriesCallback does not work with AMR enabled") end - end - # Store time_series if this is the last time step - if isfinished(integrator) - semi = integrator.p - mesh, equations, solver, _ = mesh_equations_solver_cache(semi) - save_time_series_file(time_series_callback, mesh, equations, solver) - end + @unpack interval = time_series_callback + + # Create record if in correct interval (needs to be checked since the callback is also called + # after the final step for storing the data on disk, independent of the current interval) + if integrator.stats.naccept % interval == 0 + @trixi_timeit timer() "time series" begin + # Store time and step + push!(time_series_callback.time, integrator.t) + push!(time_series_callback.step, integrator.stats.naccept) + + # Unpack data + u_ode = integrator.u + semi = integrator.p + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) + u = wrap_array(u_ode, mesh, equations, solver, cache) + + @unpack (point_data, solution_variables, + variable_names, time_series_cache) = time_series_callback + + # Record state at points (solver/mesh-dependent implementation) + record_state_at_points!(point_data, u, solution_variables, + length(variable_names), mesh, + equations, solver, time_series_cache) + end + end - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) + # Store time_series if this is the last time step + if isfinished(integrator) + semi = integrator.p + mesh, equations, solver, _ = mesh_equations_solver_cache(semi) + save_time_series_file(time_series_callback, mesh, equations, solver) + end - return nothing -end + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing +end include("time_series_dg.jl") include("time_series_dg2d.jl") - - end # @muladd diff --git a/src/callbacks_step/time_series_dg.jl b/src/callbacks_step/time_series_dg.jl index 3a383fa1fd4..1b63979d579 100644 --- a/src/callbacks_step/time_series_dg.jl +++ b/src/callbacks_step/time_series_dg.jl @@ -3,35 +3,33 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Store time series file for a TreeMesh with a DG solver function save_time_series_file(time_series_callback, mesh::TreeMesh, equations, dg::DG) - @unpack (interval, solution_variables, variable_names, - output_directory, filename, point_coordinates, - point_data, time, step, time_series_cache) = time_series_callback - n_points = length(point_data) + @unpack (interval, solution_variables, variable_names, + output_directory, filename, point_coordinates, + point_data, time, step, time_series_cache) = time_series_callback + n_points = length(point_data) - h5open(joinpath(output_directory, filename), "w") do file - # Add context information as attributes - n_variables = length(variable_names) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["equations"] = get_name(equations) - attributes(file)["polydeg"] = polydeg(dg) - attributes(file)["n_vars"] = n_variables - attributes(file)["n_points"] = n_points - attributes(file)["interval"] = interval - attributes(file)["variable_names"] = collect(variable_names) + h5open(joinpath(output_directory, filename), "w") do file + # Add context information as attributes + n_variables = length(variable_names) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["equations"] = get_name(equations) + attributes(file)["polydeg"] = polydeg(dg) + attributes(file)["n_vars"] = n_variables + attributes(file)["n_points"] = n_points + attributes(file)["interval"] = interval + attributes(file)["variable_names"] = collect(variable_names) - file["time"] = time - file["timestep"] = step - file["point_coordinates"] = point_coordinates - for p in 1:n_points - # Store data as 2D array for convenience - file["point_data_$p"] = reshape(point_data[p], n_variables, length(time)) + file["time"] = time + file["timestep"] = step + file["point_coordinates"] = point_coordinates + for p in 1:n_points + # Store data as 2D array for convenience + file["point_data_$p"] = reshape(point_data[p], n_variables, length(time)) + end end - end end - - end # @muladd diff --git a/src/callbacks_step/time_series_dg2d.jl b/src/callbacks_step/time_series_dg2d.jl index 778739a824b..c15945d6e16 100644 --- a/src/callbacks_step/time_series_dg2d.jl +++ b/src/callbacks_step/time_series_dg2d.jl @@ -3,148 +3,151 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Creates cache for time series callback function create_cache_time_series(point_coordinates, mesh::TreeMesh{2}, dg, cache) - # Determine element ids for point coordinates - element_ids = get_elements_by_coordinates(point_coordinates, mesh, dg, cache) + # Determine element ids for point coordinates + element_ids = get_elements_by_coordinates(point_coordinates, mesh, dg, cache) - # Calculate & store Lagrange interpolation polynomials - interpolating_polynomials = calc_interpolating_polynomials(point_coordinates, element_ids, mesh, - dg, cache) + # Calculate & store Lagrange interpolation polynomials + interpolating_polynomials = calc_interpolating_polynomials(point_coordinates, + element_ids, mesh, + dg, cache) - time_series_cache = (; element_ids, interpolating_polynomials) + time_series_cache = (; element_ids, interpolating_polynomials) - return time_series_cache + return time_series_cache end - # Find element ids containing coordinates given as a matrix [ndims, npoints] -function get_elements_by_coordinates!(element_ids, coordinates, mesh::TreeMesh, dg, cache) - if length(element_ids) != size(coordinates, 2) - throw(DimensionMismatch("storage length for element ids does not match the number of coordinates")) - end - - @unpack cell_ids = cache.elements - @unpack tree = mesh - - # Reset element ids - 0 indicates "not (yet) found" - element_ids .= 0 - found_elements = 0 - - # Iterate over all elements - for element in eachelement(dg, cache) - # Get cell id - cell_id = cell_ids[element] - - # Iterate over coordinates - for index in 1:length(element_ids) - # Skip coordinates for which an element has already been found - if element_ids[index] > 0 - continue - end - - # Construct point - x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh))) - - # Skip if point is not in cell - if !is_point_in_cell(tree, x, cell_id) - continue - end - - # Otherwise point is in cell and thus in element - element_ids[index] = element - found_elements += 1 +function get_elements_by_coordinates!(element_ids, coordinates, mesh::TreeMesh, dg, + cache) + if length(element_ids) != size(coordinates, 2) + throw(DimensionMismatch("storage length for element ids does not match the number of coordinates")) end - # Exit loop if all elements have already been found - if found_elements == length(element_ids) - break + @unpack cell_ids = cache.elements + @unpack tree = mesh + + # Reset element ids - 0 indicates "not (yet) found" + element_ids .= 0 + found_elements = 0 + + # Iterate over all elements + for element in eachelement(dg, cache) + # Get cell id + cell_id = cell_ids[element] + + # Iterate over coordinates + for index in 1:length(element_ids) + # Skip coordinates for which an element has already been found + if element_ids[index] > 0 + continue + end + + # Construct point + x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh))) + + # Skip if point is not in cell + if !is_point_in_cell(tree, x, cell_id) + continue + end + + # Otherwise point is in cell and thus in element + element_ids[index] = element + found_elements += 1 + end + + # Exit loop if all elements have already been found + if found_elements == length(element_ids) + break + end end - end - return element_ids + return element_ids end - function get_elements_by_coordinates(coordinates, mesh, dg, cache) - element_ids = Vector{Int}(undef, size(coordinates, 2)) - get_elements_by_coordinates!(element_ids, coordinates, mesh, dg, cache) + element_ids = Vector{Int}(undef, size(coordinates, 2)) + get_elements_by_coordinates!(element_ids, coordinates, mesh, dg, cache) - return element_ids + return element_ids end - # Calculate the interpolating polynomials to extract data at the given coordinates # The coordinates are known to be located in the respective element in `element_ids` -function calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids, +function calc_interpolating_polynomials!(interpolating_polynomials, coordinates, + element_ids, mesh::TreeMesh, dg::DGSEM, cache) - @unpack tree = mesh - @unpack nodes = dg.basis + @unpack tree = mesh + @unpack nodes = dg.basis - wbary = barycentric_weights(nodes) + wbary = barycentric_weights(nodes) - for index in 1:length(element_ids) - # Construct point - x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh))) - - # Convert to unit coordinates - cell_id = cache.elements.cell_ids[element_ids[index]] - cell_coordinates_ = cell_coordinates(tree, cell_id) - cell_length = length_at_cell(tree, cell_id) - unit_coordinates = (x .- cell_coordinates_) * 2 / cell_length - - # Calculate interpolating polynomial for each dimension, making use of tensor product structure - for d in 1:ndims(mesh) - interpolating_polynomials[:, d, index] .= lagrange_interpolating_polynomials( - unit_coordinates[d], nodes, wbary) + for index in 1:length(element_ids) + # Construct point + x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh))) + + # Convert to unit coordinates + cell_id = cache.elements.cell_ids[element_ids[index]] + cell_coordinates_ = cell_coordinates(tree, cell_id) + cell_length = length_at_cell(tree, cell_id) + unit_coordinates = (x .- cell_coordinates_) * 2 / cell_length + + # Calculate interpolating polynomial for each dimension, making use of tensor product structure + for d in 1:ndims(mesh) + interpolating_polynomials[:, d, index] .= lagrange_interpolating_polynomials(unit_coordinates[d], + nodes, + wbary) + end end - end - return interpolating_polynomials + return interpolating_polynomials end +function calc_interpolating_polynomials(coordinates, element_ids, mesh::TreeMesh, dg, + cache) + interpolating_polynomials = Array{real(dg), 3}(undef, + nnodes(dg), ndims(mesh), + length(element_ids)) + calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids, + mesh, dg, + cache) -function calc_interpolating_polynomials(coordinates, element_ids, mesh::TreeMesh, dg, cache) - interpolating_polynomials = Array{real(dg), 3}(undef, - nnodes(dg), ndims(mesh), length(element_ids)) - calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids, mesh, dg, - cache) - - return interpolating_polynomials + return interpolating_polynomials end - # Record the solution variables at each given point -function record_state_at_points!(point_data, u, solution_variables, n_solution_variables, - mesh::TreeMesh{2}, equations, dg::DG, time_series_cache) - @unpack element_ids, interpolating_polynomials = time_series_cache - old_length = length(first(point_data)) - new_length = old_length + n_solution_variables - - # Loop over all points/elements that should be recorded - for index in 1:length(element_ids) - # Extract data array and element id - data = point_data[index] - element_id = element_ids[index] - - # Make room for new data to be recorded - resize!(data, new_length) - data[(old_length+1):new_length] .= zero(eltype(data)) - - # Loop over all nodes to compute their contribution to the interpolated values - for j in eachnode(dg), i in eachnode(dg) - u_node = solution_variables(get_node_vars(u, equations, dg, i, j, element_id), equations) - - for v in 1:length(u_node) - data[old_length + v] += (u_node[v] - * interpolating_polynomials[i, 1, index] - * interpolating_polynomials[j, 2, index]) - end +function record_state_at_points!(point_data, u, solution_variables, + n_solution_variables, + mesh::TreeMesh{2}, equations, dg::DG, + time_series_cache) + @unpack element_ids, interpolating_polynomials = time_series_cache + old_length = length(first(point_data)) + new_length = old_length + n_solution_variables + + # Loop over all points/elements that should be recorded + for index in 1:length(element_ids) + # Extract data array and element id + data = point_data[index] + element_id = element_ids[index] + + # Make room for new data to be recorded + resize!(data, new_length) + data[(old_length + 1):new_length] .= zero(eltype(data)) + + # Loop over all nodes to compute their contribution to the interpolated values + for j in eachnode(dg), i in eachnode(dg) + u_node = solution_variables(get_node_vars(u, equations, dg, i, j, + element_id), equations) + + for v in 1:length(u_node) + data[old_length + v] += (u_node[v] + * interpolating_polynomials[i, 1, index] + * interpolating_polynomials[j, 2, index]) + end + end end - end end - - end # @muladd diff --git a/src/callbacks_step/trivial.jl b/src/callbacks_step/trivial.jl index 5a16ab059a1..a55b7d85b13 100644 --- a/src/callbacks_step/trivial.jl +++ b/src/callbacks_step/trivial.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ TrivialCallback() @@ -12,26 +12,25 @@ A callback that does nothing. This can be useful to disable some callbacks easily via [`trixi_include`](@ref). """ function TrivialCallback() - DiscreteCallback(trivial_callback, trivial_callback, - save_positions=(false,false)) + DiscreteCallback(trivial_callback, trivial_callback, + save_positions = (false, false)) end trivial_callback(u, t, integrator) = false trivial_callback(integrator) = u_modified!(integrator, false) - function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(trivial_callback)}) - @nospecialize cb # reduce precompilation time + @nospecialize cb # reduce precompilation time - print(io, "TrivialCallback()") + print(io, "TrivialCallback()") end - # This allows to set `summary_callback = TrivialCallback()` in elixirs to suppress # output, e.g. in `convergence_test`. -function (cb::DiscreteCallback{Condition,Affect!})(io::IO=stdout) where {Condition, Affect!<:typeof(trivial_callback)} - return nothing +function (cb::DiscreteCallback{Condition, Affect!})(io::IO = stdout) where {Condition, + Affect! <: + typeof(trivial_callback) + } + return nothing end - - end # @muladd diff --git a/src/callbacks_step/visualization.jl b/src/callbacks_step/visualization.jl index 6eb04608368..98c0126a302 100644 --- a/src/callbacks_step/visualization.jl +++ b/src/callbacks_step/visualization.jl @@ -3,51 +3,58 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - - -mutable struct VisualizationCallback{SolutionVariables, VariableNames, PlotDataCreator, PlotCreator} - interval::Int - solution_variables::SolutionVariables - variable_names::VariableNames - show_mesh::Bool - plot_data_creator::PlotDataCreator - plot_creator::PlotCreator - plot_arguments::Dict{Symbol,Any} -end - - -function Base.show(io::IO, cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:VisualizationCallback} - visualization_callback = cb.affect! - @unpack interval, plot_arguments, solution_variables, variable_names, show_mesh, plot_creator, plot_data_creator = visualization_callback - print(io, "VisualizationCallback(", - "interval=", interval, ", ", - "solution_variables=", solution_variables, ", ", - "variable_names=", variable_names, ", ", - "show_mesh=", show_mesh, ", ", - "plot_data_creator=", plot_data_creator, ", ", - "plot_creator=", plot_creator, ", ", - "plot_arguments=", plot_arguments, ")") +#! format: noindent + +mutable struct VisualizationCallback{SolutionVariables, VariableNames, PlotDataCreator, + PlotCreator} + interval::Int + solution_variables::SolutionVariables + variable_names::VariableNames + show_mesh::Bool + plot_data_creator::PlotDataCreator + plot_creator::PlotCreator + plot_arguments::Dict{Symbol, Any} end -function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:VisualizationCallback} - if get(io, :compact, false) - show(io, cb) - else +function Base.show(io::IO, + cb::DiscreteCallback{Condition, Affect!}) where {Condition, + Affect! <: + VisualizationCallback + } visualization_callback = cb.affect! - - setup = [ - "interval" => visualization_callback.interval, - "plot arguments" => visualization_callback.plot_arguments, - "solution variables" => visualization_callback.solution_variables, - "variable names" => visualization_callback.variable_names, - "show mesh" => visualization_callback.show_mesh, - "plot creator" => visualization_callback.plot_creator, - "plot data creator" => visualization_callback.plot_data_creator, - ] - summary_box(io, "VisualizationCallback", setup) - end + @unpack interval, plot_arguments, solution_variables, variable_names, show_mesh, plot_creator, plot_data_creator = visualization_callback + print(io, "VisualizationCallback(", + "interval=", interval, ", ", + "solution_variables=", solution_variables, ", ", + "variable_names=", variable_names, ", ", + "show_mesh=", show_mesh, ", ", + "plot_data_creator=", plot_data_creator, ", ", + "plot_creator=", plot_creator, ", ", + "plot_arguments=", plot_arguments, ")") end +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{Condition, Affect!}) where {Condition, + Affect! <: + VisualizationCallback + } + if get(io, :compact, false) + show(io, cb) + else + visualization_callback = cb.affect! + + setup = [ + "interval" => visualization_callback.interval, + "plot arguments" => visualization_callback.plot_arguments, + "solution variables" => visualization_callback.solution_variables, + "variable names" => visualization_callback.variable_names, + "show mesh" => visualization_callback.show_mesh, + "plot creator" => visualization_callback.plot_creator, + "plot data creator" => visualization_callback.plot_data_creator, + ] + summary_box(io, "VisualizationCallback", setup) + end +end """ VisualizationCallback(; interval=0, @@ -75,92 +82,90 @@ To customize the generated figure, `plot_data_creator` allows to use different p same interface as the default implementation [`show_plot`](@ref). All remaining keyword arguments are collected and passed as additional arguments to the plotting command. """ -function VisualizationCallback(; interval=0, - solution_variables=cons2prim, - variable_names=[], - show_mesh=false, - plot_data_creator=PlotData2D, - plot_creator=show_plot, - plot_arguments...) - mpi_isparallel() && error("this callback does not work in parallel yet") - - if variable_names isa String - variable_names = String[variable_names] - end - - visualization_callback = VisualizationCallback(interval, - solution_variables, variable_names, show_mesh, - plot_data_creator, plot_creator, - Dict{Symbol,Any}(plot_arguments)) - - # Warn users if they create a visualization callback without having loaded the Plots package - # - # Note: This warning is added for convenience, as Plots is the only "officially" supported - # visualization package right now. However, in general nothing prevents anyone from using - # other packages such as Makie, Gadfly etc., given that appropriate `plot_creator`s are - # passed. This is also the reason why the visualization callback is not included via - # Requires.jl only when Plots is present. - # In the future, we should update/remove this warning if other plotting packages are - # starting to be used. - if !(:Plots in names(@__MODULE__, all=true)) - @warn "Package `Plots` not loaded but required by `VisualizationCallback` to visualize results" - end - - DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect! - save_positions=(false,false), - initialize=initialize!) +function VisualizationCallback(; interval = 0, + solution_variables = cons2prim, + variable_names = [], + show_mesh = false, + plot_data_creator = PlotData2D, + plot_creator = show_plot, + plot_arguments...) + mpi_isparallel() && error("this callback does not work in parallel yet") + + if variable_names isa String + variable_names = String[variable_names] + end + + visualization_callback = VisualizationCallback(interval, + solution_variables, variable_names, + show_mesh, + plot_data_creator, plot_creator, + Dict{Symbol, Any}(plot_arguments)) + + # Warn users if they create a visualization callback without having loaded the Plots package + # + # Note: This warning is added for convenience, as Plots is the only "officially" supported + # visualization package right now. However, in general nothing prevents anyone from using + # other packages such as Makie, Gadfly etc., given that appropriate `plot_creator`s are + # passed. This is also the reason why the visualization callback is not included via + # Requires.jl only when Plots is present. + # In the future, we should update/remove this warning if other plotting packages are + # starting to be used. + if !(:Plots in names(@__MODULE__, all = true)) + @warn "Package `Plots` not loaded but required by `VisualizationCallback` to visualize results" + end + + DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect! + save_positions = (false, false), + initialize = initialize!) end +function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t, + integrator) where {Condition, Affect! <: VisualizationCallback} + visualization_callback = cb.affect! -function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:VisualizationCallback} - visualization_callback = cb.affect! - - visualization_callback(integrator) + visualization_callback(integrator) - return nothing + return nothing end - # this method is called to determine whether the callback should be activated function (visualization_callback::VisualizationCallback)(u, t, integrator) - @unpack interval = visualization_callback - - # With error-based step size control, some steps can be rejected. Thus, - # `integrator.iter >= integrator.stats.naccept` - # (total #steps) (#accepted steps) - # We need to check the number of accepted steps since callbacks are not - # activated after a rejected step. - return interval > 0 && ( (integrator.stats.naccept % interval == 0 && - !(integrator.stats.naccept == 0 && integrator.iter > 0)) || - isfinished(integrator)) + @unpack interval = visualization_callback + + # With error-based step size control, some steps can be rejected. Thus, + # `integrator.iter >= integrator.stats.naccept` + # (total #steps) (#accepted steps) + # We need to check the number of accepted steps since callbacks are not + # activated after a rejected step. + return interval > 0 && ((integrator.stats.naccept % interval == 0 && + !(integrator.stats.naccept == 0 && integrator.iter > 0)) || + isfinished(integrator)) end - # this method is called when the callback is activated function (visualization_callback::VisualizationCallback)(integrator) - u_ode = integrator.u - semi = integrator.p - @unpack plot_arguments, solution_variables, variable_names, show_mesh, plot_data_creator, plot_creator = visualization_callback - - # Extract plot data - plot_data = plot_data_creator(u_ode, semi, solution_variables=solution_variables) - - # If variable names were not specified, plot everything - if isempty(variable_names) - variable_names = String[keys(plot_data)...] - end - - # Create plot - plot_creator(plot_data, variable_names; - show_mesh=show_mesh, plot_arguments=plot_arguments, - time=integrator.t, timestep=integrator.stats.naccept) - - # avoid re-evaluating possible FSAL stages - u_modified!(integrator, false) - return nothing + u_ode = integrator.u + semi = integrator.p + @unpack plot_arguments, solution_variables, variable_names, show_mesh, plot_data_creator, plot_creator = visualization_callback + + # Extract plot data + plot_data = plot_data_creator(u_ode, semi, solution_variables = solution_variables) + + # If variable names were not specified, plot everything + if isempty(variable_names) + variable_names = String[keys(plot_data)...] + end + + # Create plot + plot_creator(plot_data, variable_names; + show_mesh = show_mesh, plot_arguments = plot_arguments, + time = integrator.t, timestep = integrator.stats.naccept) + + # avoid re-evaluating possible FSAL stages + u_modified!(integrator, false) + return nothing end - """ show_plot(plot_data, variable_names; show_mesh=true, plot_arguments=Dict{Symbol,Any}(), @@ -179,41 +184,40 @@ This function is the default `plot_creator` argument for the [`VisualizationCall See also: [`VisualizationCallback`](@ref), [`save_plot`](@ref) """ function show_plot(plot_data, variable_names; - show_mesh=true, plot_arguments=Dict{Symbol,Any}(), - time=nothing, timestep=nothing) - # Gather subplots - plots = [] - for v in variable_names - push!(plots, Plots.plot(plot_data[v]; plot_arguments...)) - end - if show_mesh - push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...)) - end - - # Note, for the visualization callback to work for general equation systems - # this layout construction would need to use the if-logic below. - # Currently, there is no use case for this so it is left here as a note. - # - # Determine layout - # if length(plots) <= 3 - # cols = length(plots) - # rows = 1 - # else - # cols = ceil(Int, sqrt(length(plots))) - # rows = div(length(plots), cols, RoundUp) - # end - # layout = (rows, cols) - - # Determine layout - cols = ceil(Int, sqrt(length(plots))) - rows = div(length(plots), cols, RoundUp) - layout = (rows, cols) - - # Show plot - display(Plots.plot(plots..., layout=layout)) + show_mesh = true, plot_arguments = Dict{Symbol, Any}(), + time = nothing, timestep = nothing) + # Gather subplots + plots = [] + for v in variable_names + push!(plots, Plots.plot(plot_data[v]; plot_arguments...)) + end + if show_mesh + push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...)) + end + + # Note, for the visualization callback to work for general equation systems + # this layout construction would need to use the if-logic below. + # Currently, there is no use case for this so it is left here as a note. + # + # Determine layout + # if length(plots) <= 3 + # cols = length(plots) + # rows = 1 + # else + # cols = ceil(Int, sqrt(length(plots))) + # rows = div(length(plots), cols, RoundUp) + # end + # layout = (rows, cols) + + # Determine layout + cols = ceil(Int, sqrt(length(plots))) + rows = div(length(plots), cols, RoundUp) + layout = (rows, cols) + + # Show plot + display(Plots.plot(plots..., layout = layout)) end - """ save_plot(plot_data, variable_names; show_mesh=true, plot_arguments=Dict{Symbol,Any}(), @@ -232,29 +236,27 @@ The `timestep` is used in the filename. `time` is currently unused by this funct See also: [`VisualizationCallback`](@ref), [`show_plot`](@ref) """ function save_plot(plot_data, variable_names; - show_mesh=true, plot_arguments=Dict{Symbol,Any}(), - time=nothing, timestep=nothing) - # Gather subplots - plots = [] - for v in variable_names - push!(plots, Plots.plot(plot_data[v]; plot_arguments...)) - end - if show_mesh - push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...)) - end - - # Determine layout - cols = ceil(Int, sqrt(length(plots))) - rows = div(length(plots), cols, RoundUp) - layout = (rows, cols) - - # Create plot - Plots.plot(plots..., layout=layout) - - # Determine filename and save plot - filename = joinpath("out", @sprintf("solution_%06d.png", timestep)) - Plots.savefig(filename) + show_mesh = true, plot_arguments = Dict{Symbol, Any}(), + time = nothing, timestep = nothing) + # Gather subplots + plots = [] + for v in variable_names + push!(plots, Plots.plot(plot_data[v]; plot_arguments...)) + end + if show_mesh + push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...)) + end + + # Determine layout + cols = ceil(Int, sqrt(length(plots))) + rows = div(length(plots), cols, RoundUp) + layout = (rows, cols) + + # Create plot + Plots.plot(plots..., layout = layout) + + # Determine filename and save plot + filename = joinpath("out", @sprintf("solution_%06d.png", timestep)) + Plots.savefig(filename) end - - end # @muladd diff --git a/src/equations/acoustic_perturbation_2d.jl b/src/equations/acoustic_perturbation_2d.jl index 9161de8da15..786630a14c7 100644 --- a/src/equations/acoustic_perturbation_2d.jl +++ b/src/equations/acoustic_perturbation_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" AcousticPerturbationEquations2D(v_mean_global, c_mean_global, rho_mean_global) @@ -47,41 +47,51 @@ The equations are based on the APE-4 system introduced in the following paper: Acoustic perturbation equations based on flow decomposition via source filtering [DOI: 10.1016/S0021-9991(03)00168-2](https://doi.org/10.1016/S0021-9991(03)00168-2) """ -struct AcousticPerturbationEquations2D{RealT<:Real} <: AbstractAcousticPerturbationEquations{2, 7} - v_mean_global::SVector{2, RealT} - c_mean_global::RealT - rho_mean_global::RealT +struct AcousticPerturbationEquations2D{RealT <: Real} <: + AbstractAcousticPerturbationEquations{2, 7} + v_mean_global::SVector{2, RealT} + c_mean_global::RealT + rho_mean_global::RealT end -function AcousticPerturbationEquations2D(v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, +function AcousticPerturbationEquations2D(v_mean_global::NTuple{2, <:Real}, + c_mean_global::Real, rho_mean_global::Real) - return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global) + return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, + rho_mean_global) end -function AcousticPerturbationEquations2D(; v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, +function AcousticPerturbationEquations2D(; v_mean_global::NTuple{2, <:Real}, + c_mean_global::Real, rho_mean_global::Real) - return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global) + return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, + rho_mean_global) end - -varnames(::typeof(cons2cons), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime_scaled", - "v1_mean", "v2_mean", "c_mean", "rho_mean") -varnames(::typeof(cons2prim), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime", - "v1_mean", "v2_mean", "c_mean", "rho_mean") - +function varnames(::typeof(cons2cons), ::AcousticPerturbationEquations2D) + ("v1_prime", "v2_prime", "p_prime_scaled", + "v1_mean", "v2_mean", "c_mean", "rho_mean") +end +function varnames(::typeof(cons2prim), ::AcousticPerturbationEquations2D) + ("v1_prime", "v2_prime", "p_prime", + "v1_mean", "v2_mean", "c_mean", "rho_mean") +end # Convenience functions for retrieving state variables and mean variables function cons2state(u, equations::AcousticPerturbationEquations2D) - return SVector(u[1], u[2], u[3]) + return SVector(u[1], u[2], u[3]) end function cons2mean(u, equations::AcousticPerturbationEquations2D) - return SVector(u[4], u[5], u[6], u[7]) + return SVector(u[4], u[5], u[6], u[7]) end -varnames(::typeof(cons2state), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime_scaled") -varnames(::typeof(cons2mean), ::AcousticPerturbationEquations2D) = ("v1_mean", "v2_mean", "c_mean", "rho_mean") - +function varnames(::typeof(cons2state), ::AcousticPerturbationEquations2D) + ("v1_prime", "v2_prime", "p_prime_scaled") +end +function varnames(::typeof(cons2mean), ::AcousticPerturbationEquations2D) + ("v1_mean", "v2_mean", "c_mean", "rho_mean") +end """ global_mean_vars(equations::AcousticPerturbationEquations2D) @@ -90,11 +100,11 @@ Returns the global mean variables stored in `equations`. This makes it easier to define flexible initial conditions for problems with constant mean flow. """ function global_mean_vars(equations::AcousticPerturbationEquations2D) - return equations.v_mean_global[1], equations.v_mean_global[2], equations.c_mean_global, - equations.rho_mean_global + return equations.v_mean_global[1], equations.v_mean_global[2], + equations.c_mean_global, + equations.rho_mean_global end - """ initial_condition_constant(x, t, equations::AcousticPerturbationEquations2D) @@ -102,36 +112,36 @@ A constant initial condition where the state variables are zero and the mean flo Uses the global mean values from `equations`. """ function initial_condition_constant(x, t, equations::AcousticPerturbationEquations2D) - v1_prime = 0.0 - v2_prime = 0.0 - p_prime_scaled = 0.0 + v1_prime = 0.0 + v2_prime = 0.0 + p_prime_scaled = 0.0 - return SVector(v1_prime, v2_prime, p_prime_scaled, global_mean_vars(equations)...) + return SVector(v1_prime, v2_prime, p_prime_scaled, global_mean_vars(equations)...) end - """ initial_condition_convergence_test(x, t, equations::AcousticPerturbationEquations2D) A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref). Uses the global mean values from `equations`. """ -function initial_condition_convergence_test(x, t, equations::AcousticPerturbationEquations2D) - c = 2.0 - A = 0.2 - L = 2.0 - f = 2.0 / L - a = 1.0 - omega = 2 * pi * f - init = c + A * sin(omega * (x[1] + x[2] - a*t)) - - v1_prime = init - v2_prime = init - p_prime = init^2 - - prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...) - - return prim2cons(prim, equations) +function initial_condition_convergence_test(x, t, + equations::AcousticPerturbationEquations2D) + c = 2.0 + A = 0.2 + L = 2.0 + f = 2.0 / L + a = 1.0 + omega = 2 * pi * f + init = c + A * sin(omega * (x[1] + x[2] - a * t)) + + v1_prime = init + v2_prime = init + p_prime = init^2 + + prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...) + + return prim2cons(prim, equations) end """ @@ -140,73 +150,75 @@ end Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref). """ -function source_terms_convergence_test(u, x, t, equations::AcousticPerturbationEquations2D) - v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) +function source_terms_convergence_test(u, x, t, + equations::AcousticPerturbationEquations2D) + v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) - c = 2.0 - A = 0.2 - L = 2.0 - f = 2.0 / L - a = 1.0 - omega = 2 * pi * f + c = 2.0 + A = 0.2 + L = 2.0 + f = 2.0 / L + a = 1.0 + omega = 2 * pi * f - si, co = sincos(omega * (x[1] + x[2] - a * t)) - tmp = v1_mean + v2_mean - a + si, co = sincos(omega * (x[1] + x[2] - a * t)) + tmp = v1_mean + v2_mean - a - du1 = du2 = A * omega * co * (2 * c/rho_mean + tmp + 2/rho_mean * A * si) - du3 = A * omega * co * (2 * c_mean^2 * rho_mean + 2 * c * tmp + 2 * A * tmp * si) / c_mean^2 + du1 = du2 = A * omega * co * (2 * c / rho_mean + tmp + 2 / rho_mean * A * si) + du3 = A * omega * co * (2 * c_mean^2 * rho_mean + 2 * c * tmp + 2 * A * tmp * si) / + c_mean^2 - du4 = du5 = du6 = du7 = 0.0 + du4 = du5 = du6 = du7 = 0.0 - return SVector(du1, du2, du3, du4, du5, du6, du7) + return SVector(du1, du2, du3, du4, du5, du6, du7) end - """ initial_condition_gauss(x, t, equations::AcousticPerturbationEquations2D) A Gaussian pulse in a constant mean flow. Uses the global mean values from `equations`. """ function initial_condition_gauss(x, t, equations::AcousticPerturbationEquations2D) - v1_prime = 0.0 - v2_prime = 0.0 - p_prime = exp(-4*(x[1]^2 + x[2]^2)) + v1_prime = 0.0 + v2_prime = 0.0 + p_prime = exp(-4 * (x[1]^2 + x[2]^2)) - prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...) + prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...) - return prim2cons(prim, equations) + return prim2cons(prim, equations) end - """ boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function, equations::AcousticPerturbationEquations2D) Boundary conditions for a solid wall. """ -function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function, +function boundary_condition_wall(u_inner, orientation, direction, x, t, + surface_flux_function, equations::AcousticPerturbationEquations2D) - # Boundary state is equal to the inner state except for the perturbed velocity. For boundaries - # in the -x/+x direction, we multiply the perturbed velocity in the x direction by -1. - # Similarly, for boundaries in the -y/+y direction, we multiply the perturbed velocity in the - # y direction by -1 - if direction in (1, 2) # x direction - u_boundary = SVector(-u_inner[1], u_inner[2], u_inner[3], cons2mean(u_inner, equations)...) - else # y direction - u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], cons2mean(u_inner, equations)...) - end - - # Calculate boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equations) - end - - return flux + # Boundary state is equal to the inner state except for the perturbed velocity. For boundaries + # in the -x/+x direction, we multiply the perturbed velocity in the x direction by -1. + # Similarly, for boundaries in the -y/+y direction, we multiply the perturbed velocity in the + # y direction by -1 + if direction in (1, 2) # x direction + u_boundary = SVector(-u_inner[1], u_inner[2], u_inner[3], + cons2mean(u_inner, equations)...) + else # y direction + u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], + cons2mean(u_inner, equations)...) + end + + # Calculate boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equations) + end + + return flux end - """ boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function, equations::AcousticPerturbationEquations2D) @@ -219,148 +231,152 @@ Further details are available in the paper: [DOI: 10.2514/1.J050333](https://doi.org/10.2514/1.J050333) """ function boundary_condition_slip_wall(u_inner, normal_direction::AbstractVector, x, t, - surface_flux_function, equations::AcousticPerturbationEquations2D) - # normalize the outward pointing direction - normal = normal_direction / norm(normal_direction) + surface_flux_function, + equations::AcousticPerturbationEquations2D) + # normalize the outward pointing direction + normal = normal_direction / norm(normal_direction) - # compute the normal perturbed velocity - u_normal = normal[1] * u_inner[1] + normal[2] * u_inner[2] + # compute the normal perturbed velocity + u_normal = normal[1] * u_inner[1] + normal[2] * u_inner[2] - # create the "external" boundary solution state - u_boundary = SVector(u_inner[1] - 2.0 * u_normal * normal[1], - u_inner[2] - 2.0 * u_normal * normal[2], - u_inner[3], cons2mean(u_inner, equations)...) + # create the "external" boundary solution state + u_boundary = SVector(u_inner[1] - 2.0 * u_normal * normal[1], + u_inner[2] - 2.0 * u_normal * normal[2], + u_inner[3], cons2mean(u_inner, equations)...) - # calculate the boundary flux - flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) + # calculate the boundary flux + flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) - return flux + return flux end - # Calculate 1D flux for a single point -@inline function flux(u, orientation::Integer, equations::AcousticPerturbationEquations2D) - v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations) - v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) - - # Calculate flux for conservative state variables - if orientation == 1 - f1 = v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean - f2 = zero(eltype(u)) - f3 = rho_mean * v1_prime + v1_mean * p_prime_scaled - else - f1 = zero(eltype(u)) - f2 = v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean - f3 = rho_mean * v2_prime + v2_mean * p_prime_scaled - end - - # The rest of the state variables are actually variable coefficients, hence the flux should be - # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762 - # for details. - f4 = f5 = f6 = f7 = zero(eltype(u)) - - return SVector(f1, f2, f3, f4, f5, f6, f7) +@inline function flux(u, orientation::Integer, + equations::AcousticPerturbationEquations2D) + v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations) + v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) + + # Calculate flux for conservative state variables + if orientation == 1 + f1 = v1_mean * v1_prime + v2_mean * v2_prime + + c_mean^2 * p_prime_scaled / rho_mean + f2 = zero(eltype(u)) + f3 = rho_mean * v1_prime + v1_mean * p_prime_scaled + else + f1 = zero(eltype(u)) + f2 = v1_mean * v1_prime + v2_mean * v2_prime + + c_mean^2 * p_prime_scaled / rho_mean + f3 = rho_mean * v2_prime + v2_mean * p_prime_scaled + end + + # The rest of the state variables are actually variable coefficients, hence the flux should be + # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762 + # for details. + f4 = f5 = f6 = f7 = zero(eltype(u)) + + return SVector(f1, f2, f3, f4, f5, f6, f7) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::AcousticPerturbationEquations2D) - # Calculate v = v_prime + v_mean - v_prime_ll = u_ll[orientation] - v_prime_rr = u_rr[orientation] - v_mean_ll = u_ll[orientation + 3] - v_mean_rr = u_rr[orientation + 3] +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::AcousticPerturbationEquations2D) + # Calculate v = v_prime + v_mean + v_prime_ll = u_ll[orientation] + v_prime_rr = u_rr[orientation] + v_mean_ll = u_ll[orientation + 3] + v_mean_rr = u_rr[orientation + 3] - v_ll = v_prime_ll + v_mean_ll - v_rr = v_prime_rr + v_mean_rr + v_ll = v_prime_ll + v_mean_ll + v_rr = v_prime_rr + v_mean_rr - c_mean_ll = u_ll[6] - c_mean_rr = u_rr[6] + c_mean_ll = u_ll[6] + c_mean_rr = u_rr[6] - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr) + λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr) end - # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equations::AcousticPerturbationEquations2D) - v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations) - v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) - - f1 = normal_direction[1] * (v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean) - f2 = normal_direction[2] * (v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean) - f3 = ( normal_direction[1] * (rho_mean * v1_prime + v1_mean * p_prime_scaled) - + normal_direction[2] * (rho_mean * v2_prime + v2_mean * p_prime_scaled) ) - - # The rest of the state variables are actually variable coefficients, hence the flux should be - # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762 - # for details. - f4 = f5 = f6 = f7 = zero(eltype(u)) - - return SVector(f1, f2, f3, f4, f5, f6, f7) +@inline function flux(u, normal_direction::AbstractVector, + equations::AcousticPerturbationEquations2D) + v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations) + v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations) + + f1 = normal_direction[1] * (v1_mean * v1_prime + v2_mean * v2_prime + + c_mean^2 * p_prime_scaled / rho_mean) + f2 = normal_direction[2] * (v1_mean * v1_prime + v2_mean * v2_prime + + c_mean^2 * p_prime_scaled / rho_mean) + f3 = (normal_direction[1] * (rho_mean * v1_prime + v1_mean * p_prime_scaled) + + + normal_direction[2] * (rho_mean * v2_prime + v2_mean * p_prime_scaled)) + + # The rest of the state variables are actually variable coefficients, hence the flux should be + # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762 + # for details. + f4 = f5 = f6 = f7 = zero(eltype(u)) + + return SVector(f1, f2, f3, f4, f5, f6, f7) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::AcousticPerturbationEquations2D) - # Calculate v = v_prime + v_mean - v_prime_ll = normal_direction[1]*u_ll[1] + normal_direction[2]*u_ll[2] - v_prime_rr = normal_direction[1]*u_rr[1] + normal_direction[2]*u_rr[2] - v_mean_ll = normal_direction[1]*u_ll[4] + normal_direction[2]*u_ll[5] - v_mean_rr = normal_direction[1]*u_rr[4] + normal_direction[2]*u_rr[5] - - v_ll = v_prime_ll + v_mean_ll - v_rr = v_prime_rr + v_mean_rr - - c_mean_ll = u_ll[6] - c_mean_rr = u_rr[6] - - # The v_normals are already scaled by the norm - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr) * norm(normal_direction) +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::AcousticPerturbationEquations2D) + # Calculate v = v_prime + v_mean + v_prime_ll = normal_direction[1] * u_ll[1] + normal_direction[2] * u_ll[2] + v_prime_rr = normal_direction[1] * u_rr[1] + normal_direction[2] * u_rr[2] + v_mean_ll = normal_direction[1] * u_ll[4] + normal_direction[2] * u_ll[5] + v_mean_rr = normal_direction[1] * u_rr[4] + normal_direction[2] * u_rr[5] + + v_ll = v_prime_ll + v_mean_ll + v_rr = v_prime_rr + v_mean_rr + + c_mean_ll = u_ll[6] + c_mean_rr = u_rr[6] + + # The v_normals are already scaled by the norm + λ_max = max(abs(v_ll), abs(v_rr)) + + max(c_mean_ll, c_mean_rr) * norm(normal_direction) end - # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the mean values -@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction, +@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, + orientation_or_normal_direction, equations::AcousticPerturbationEquations2D) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - diss = -0.5 * λ * (u_rr - u_ll) - z = zero(eltype(u_ll)) - return SVector(diss[1], diss[2], diss[3], z, z, z, z) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + diss = -0.5 * λ * (u_rr - u_ll) + z = zero(eltype(u_ll)) + return SVector(diss[1], diss[2], diss[3], z, z, z, z) end - @inline have_constant_speed(::AcousticPerturbationEquations2D) = False() @inline function max_abs_speeds(u, equations::AcousticPerturbationEquations2D) - v1_mean = u[4] - v2_mean = u[5] - c_mean = u[6] + v1_mean = u[4] + v2_mean = u[5] + c_mean = u[6] - return abs(v1_mean) + c_mean, abs(v2_mean) + c_mean + return abs(v1_mean) + c_mean, abs(v2_mean) + c_mean end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::AcousticPerturbationEquations2D) - p_prime_scaled = u[3] - c_mean = u[6] - p_prime = p_prime_scaled * c_mean^2 + p_prime_scaled = u[3] + c_mean = u[6] + p_prime = p_prime_scaled * c_mean^2 - return SVector(u[1], u[2], p_prime, u[4], u[5], u[6], u[7]) + return SVector(u[1], u[2], p_prime, u[4], u[5], u[6], u[7]) end # Convert primitive variables to conservative @inline function prim2cons(u, equations::AcousticPerturbationEquations2D) - p_prime = u[3] - c_mean = u[6] - p_prime_scaled = p_prime / c_mean^2 + p_prime = u[3] + c_mean = u[6] + p_prime_scaled = p_prime / c_mean^2 - return SVector(u[1], u[2], p_prime_scaled, u[4], u[5], u[6], u[7]) + return SVector(u[1], u[2], p_prime_scaled, u[4], u[5], u[6], u[7]) end # Convert conservative variables to entropy variables @inline cons2entropy(u, equations::AcousticPerturbationEquations2D) = u - - end # @muladd diff --git a/src/equations/compressible_euler_1d.jl b/src/equations/compressible_euler_1d.jl index f1bb18070e1..f484f26a588 100644 --- a/src/equations/compressible_euler_1d.jl +++ b/src/equations/compressible_euler_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" CompressibleEulerEquations1D(gamma) @@ -31,34 +31,34 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho v_1^2 \right) ``` the pressure. """ -struct CompressibleEulerEquations1D{RealT<:Real} <: AbstractCompressibleEulerEquations{1, 3} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - function CompressibleEulerEquations1D(gamma) - γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) - new{typeof(γ)}(γ, inv_gamma_minus_one) - end +struct CompressibleEulerEquations1D{RealT <: Real} <: + AbstractCompressibleEulerEquations{1, 3} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + function CompressibleEulerEquations1D(gamma) + γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) + new{typeof(γ)}(γ, inv_gamma_minus_one) + end end - -varnames(::typeof(cons2cons), ::CompressibleEulerEquations1D) = ("rho", "rho_v1", "rho_e") +function varnames(::typeof(cons2cons), ::CompressibleEulerEquations1D) + ("rho", "rho_v1", "rho_e") +end varnames(::typeof(cons2prim), ::CompressibleEulerEquations1D) = ("rho", "v1", "p") - """ initial_condition_constant(x, t, equations::CompressibleEulerEquations1D) A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::CompressibleEulerEquations1D) - rho = 1.0 - rho_v1 = 0.1 - rho_e = 10.0 - return SVector(rho, rho_v1, rho_e) + rho = 1.0 + rho_v1 = 0.1 + rho_e = 10.0 + return SVector(rho, rho_v1, rho_e) end - """ initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations1D) @@ -66,19 +66,20 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations1D) - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - ini = c + A * sin(ω * (x[1] - t)) +function initial_condition_convergence_test(x, t, + equations::CompressibleEulerEquations1D) + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + ini = c + A * sin(ω * (x[1] - t)) - rho = ini - rho_v1 = ini - rho_e = ini^2 + rho = ini + rho_v1 = ini + rho_e = ini^2 - return SVector(rho, rho_v1, rho_e) + return SVector(rho, rho_v1, rho_e) end """ @@ -88,32 +89,32 @@ Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations1D) - # Same settings as in `initial_condition` - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - γ = equations.gamma +@inline function source_terms_convergence_test(u, x, t, + equations::CompressibleEulerEquations1D) + # Same settings as in `initial_condition` + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + γ = equations.gamma - x1, = x + x1, = x - si, co = sincos(ω * (x1 - t)) - rho = c + A * si - rho_x = ω * A * co + si, co = sincos(ω * (x1 - t)) + rho = c + A * si + rho_x = ω * A * co - # Note that d/dt rho = -d/dx rho. - # This yields du2 = du3 = d/dx p (derivative of pressure). - # Other terms vanish because of v = 1. - du1 = zero(eltype(u)) - du2 = rho_x * (2 * rho - 0.5) * (γ - 1) - du3 = du2 + # Note that d/dt rho = -d/dx rho. + # This yields du2 = du3 = d/dx p (derivative of pressure). + # Other terms vanish because of v = 1. + du1 = zero(eltype(u)) + du2 = rho_x * (2 * rho - 0.5) * (γ - 1) + du3 = du2 - return SVector(du1, du2, du3) + return SVector(du1, du2, du3) end - """ initial_condition_density_wave(x, t, equations::CompressibleEulerEquations1D) @@ -129,15 +130,14 @@ with the following parameters - polydeg = 5 """ function initial_condition_density_wave(x, t, equations::CompressibleEulerEquations1D) - v1 = 0.1 - rho = 1 + 0.98 * sinpi(2 * (x[1] - t * v1)) - rho_v1 = rho * v1 - p = 20 - rho_e = p / (equations.gamma - 1) + 1/2 * rho * v1^2 - return SVector(rho, rho_v1, rho_e) + v1 = 0.1 + rho = 1 + 0.98 * sinpi(2 * (x[1] - t * v1)) + rho_v1 = rho * v1 + p = 20 + rho_e = p / (equations.gamma - 1) + 1 / 2 * rho * v1^2 + return SVector(rho, rho_v1, rho_e) end - """ initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations1D) @@ -146,27 +146,27 @@ A weak blast wave taken from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations1D) - # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Set up polar coordinates - inicenter = SVector(0.0) - x_norm = x[1] - inicenter[1] - r = abs(x_norm) - # The following code is equivalent to - # phi = atan(0.0, x_norm) - # cos_phi = cos(phi) - # in 1D but faster - cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, p), equations) +function initial_condition_weak_blast_wave(x, t, + equations::CompressibleEulerEquations1D) + # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Set up polar coordinates + inicenter = SVector(0.0) + x_norm = x[1] - inicenter[1] + r = abs(x_norm) + # The following code is equivalent to + # phi = atan(0.0, x_norm) + # cos_phi = cos(phi) + # in 1D but faster + cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, p), equations) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations1D) @@ -180,37 +180,36 @@ with self-gravity from spatial dimension. Thus, [`source_terms_eoc_test_coupled_euler_gravity`](@ref) is not present there. """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations1D) - # OBS! this assumes that γ = 2 other manufactured source terms are incorrect - if equations.gamma != 2.0 - error("adiabatic constant must be 2 for the coupling convergence test") - end - c = 2.0 - A = 0.1 - ini = c + A * sinpi(x[1] - t) - G = 1.0 # gravitational constant +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::CompressibleEulerEquations1D) + # OBS! this assumes that γ = 2 other manufactured source terms are incorrect + if equations.gamma != 2.0 + error("adiabatic constant must be 2 for the coupling convergence test") + end + c = 2.0 + A = 0.1 + ini = c + A * sinpi(x[1] - t) + G = 1.0 # gravitational constant - rho = ini - v1 = 1.0 - p = 2 * ini^2 * G / pi # * 2 / ndims, but ndims==1 here + rho = ini + v1 = 1.0 + p = 2 * ini^2 * G / pi # * 2 / ndims, but ndims==1 here - return prim2cons(SVector(rho, v1, p), equations) + return prim2cons(SVector(rho, v1, p), equations) end - # Calculate 1D flux for a single point @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - # Ignore orientation since it is always "1" in 1D - f1 = rho_v1 - f2 = rho_v1 * v1 + p - f3 = (rho_e + p) * v1 - return SVector(f1, f2, f3) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + # Ignore orientation since it is always "1" in 1D + f1 = rho_v1 + f2 = rho_v1 * v1 + p + f3 = (rho_e + p) * v1 + return SVector(f1, f2, f3) end - """ flux_shima_etal(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D) @@ -226,28 +225,28 @@ The modification is in the energy flux to guarantee pressure equilibrium and was compressible flows [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060) """ -@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Unpack left and right state - rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) +@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Unpack left and right state + rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - p_avg = 1/2 * ( p_ll + p_rr) - kin_avg = 1/2 * (v1_ll * v1_rr) + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + kin_avg = 1 / 2 * (v1_ll * v1_rr) - # Calculate fluxes - # Ignore orientation since it is always "1" in 1D - pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll) - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg + # Calculate fluxes + # Ignore orientation since it is always "1" in 1D + pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll) + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end - """ flux_kennedy_gruber(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D) @@ -257,28 +256,28 @@ Kinetic energy preserving two-point flux by Navier-Stokes equations for a compressible fluid [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020) """ -@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Unpack left and right state - rho_e_ll = last(u_ll) - rho_e_rr = last(u_rr) - rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) +@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Unpack left and right state + rho_e_ll = last(u_ll) + rho_e_rr = last(u_rr) + rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - p_avg = 1/2 * ( p_ll + p_rr) - e_avg = 1/2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + e_avg = 1 / 2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) - # Ignore orientation since it is always "1" in 1D - f1 = rho_avg * v1_avg - f2 = rho_avg * v1_avg * v1_avg + p_avg - f3 = (rho_avg * e_avg + p_avg) * v1_avg + # Ignore orientation since it is always "1" in 1D + f1 = rho_avg * v1_avg + f2 = rho_avg * v1_avg * v1_avg + p_avg + f3 = (rho_avg * e_avg + p_avg) * v1_avg - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end - """ flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D) @@ -288,34 +287,35 @@ Entropy conserving two-point flux by for Compressible Euler and Navier-Stokes Equations [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a) """ -@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Unpack left and right state - rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) - beta_ll = 0.5 * rho_ll / p_ll - beta_rr = 0.5 * rho_rr / p_rr - specific_kin_ll = 0.5 * (v1_ll^2) - specific_kin_rr = 0.5 * (v1_rr^2) - - # Compute the necessary mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - rho_mean = ln_mean(rho_ll, rho_rr) - beta_mean = ln_mean(beta_ll, beta_rr) - beta_avg = 0.5 * (beta_ll + beta_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - p_mean = 0.5 * rho_avg / beta_avg - velocity_square_avg = specific_kin_ll + specific_kin_rr - - # Calculate fluxes - # Ignore orientation since it is always "1" in 1D - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_mean - f3 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg - - return SVector(f1, f2, f3) +@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Unpack left and right state + rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + specific_kin_ll = 0.5 * (v1_ll^2) + specific_kin_rr = 0.5 * (v1_rr^2) + + # Compute the necessary mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + p_mean = 0.5 * rho_avg / beta_avg + velocity_square_avg = specific_kin_ll + specific_kin_rr + + # Calculate fluxes + # Ignore orientation since it is always "1" in 1D + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + f3 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + + return SVector(f1, f2, f3) end - """ flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerEquations1D) @@ -330,36 +330,38 @@ See also the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Unpack left and right state - rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * (v1_ll + v1_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr) - - # Calculate fluxes - # Ignore orientation since it is always "1" in 1D - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - - return SVector(f1, f2, f3) -end - -@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations1D) - return normal_direction[1] * flux_ranocha(u_ll, u_rr, 1, equations) +@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Unpack left and right state + rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) + + # Compute the necessary mean values + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr) + + # Calculate fluxes + # Ignore orientation since it is always "1" in 1D + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + + return SVector(f1, f2, f3) +end + +@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations1D) + return normal_direction[1] * flux_ranocha(u_ll, u_rr, 1, equations) end - """ splitting_steger_warming(u, orientation::Integer, equations::CompressibleEulerEquations1D) @@ -386,43 +388,43 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_steger_warming(u, orientation::Integer, equations::CompressibleEulerEquations1D) - fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) - fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) + fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - a = sqrt(equations.gamma * p / rho) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + a = sqrt(equations.gamma * p / rho) - lambda1 = v1 - lambda2 = v1 + a - lambda3 = v1 - a + lambda1 = v1 + lambda2 = v1 + a + lambda3 = v1 - a - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) - f3p = rho_2gamma * (alpha_p * 0.5 * v1^2 + a * v1 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) + f3p = rho_2gamma * (alpha_p * 0.5 * v1^2 + a * v1 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - return SVector(f1p, f2p, f3p) + return SVector(f1p, f2p, f3p) end @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - a = sqrt(equations.gamma * p / rho) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + a = sqrt(equations.gamma * p / rho) lambda1 = v1 lambda2 = v1 + a @@ -438,12 +440,11 @@ end f1m = rho_2gamma * alpha_m f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m)) f3m = rho_2gamma * (alpha_m * 0.5 * v1^2 + a * v1 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - return SVector(f1m, f2m, f3m) + return SVector(f1m, f2m, f3m) end - """ splitting_vanleer_haenel(u, orientation::Integer, equations::CompressibleEulerEquations1D) @@ -480,56 +481,55 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_vanleer_haenel(u, orientation::Integer, equations::CompressibleEulerEquations1D) - fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations) - fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations) + fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_vanleer_haenel(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - # sound speed and enthalpy - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho + # sound speed and enthalpy + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho - # signed Mach number - M = v1 / a + # signed Mach number + M = v1 / a - p_plus = 0.5 * (1 + equations.gamma * M) * p + p_plus = 0.5 * (1 + equations.gamma * M) * p - f1p = 0.25 * rho * a * (M + 1)^2 - f2p = f1p * v1 + p_plus - f3p = f1p * H + f1p = 0.25 * rho * a * (M + 1)^2 + f2p = f1p * v1 + p_plus + f3p = f1p * H - return SVector(f1p, f2p, f3p) + return SVector(f1p, f2p, f3p) end @inline function splitting_vanleer_haenel(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - # sound speed and enthalpy - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho + # sound speed and enthalpy + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho - # signed Mach number - M = v1 / a + # signed Mach number + M = v1 / a - p_minus = 0.5 * (1 - equations.gamma * M) * p + p_minus = 0.5 * (1 - equations.gamma * M) * p - f1m= -0.25 * rho * a * (M - 1)^2 - f2m = f1m * v1 + p_minus - f3m = f1m * H + f1m = -0.25 * rho * a * (M - 1)^2 + f2m = f1m * v1 + p_minus + f3m = f1m * H - return SVector(f1m, f2m, f3m) + return SVector(f1m, f2m, f3m) end - # TODO: FD # This splitting is interesting because it can handle the "el diablo" wave # for long time runs. Computing the eigenvalues of the operator we see @@ -568,99 +568,98 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. [DOI: 10.2514/6.1991-1566](https://doi.org/10.2514/6.1991-1566) """ @inline function splitting_coirier_vanleer(u, orientation::Integer, - equations::CompressibleEulerEquations1D) - fm = splitting_coirier_vanleer(u, Val{:minus}(), orientation, equations) - fp = splitting_coirier_vanleer(u, Val{:plus}(), orientation, equations) - return fm, fp + equations::CompressibleEulerEquations1D) + fm = splitting_coirier_vanleer(u, Val{:minus}(), orientation, equations) + fp = splitting_coirier_vanleer(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_coirier_vanleer(u, ::Val{:plus}, orientation::Integer, - equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + equations::CompressibleEulerEquations1D) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - # sound speed and enthalpy - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho + # sound speed and enthalpy + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho - # signed Mach number - M = v1 / a + # signed Mach number + M = v1 / a - P = 2 - mu = 1.0 - nu = 0.75 - omega = 2.0 # adjusted from suggested value of 1.5 + P = 2 + mu = 1.0 + nu = 0.75 + omega = 2.0 # adjusted from suggested value of 1.5 - p_plus = 0.25 * ((M + 1)^2 * (2 - M) - nu * M * (M^2 - 1)^P) * p + p_plus = 0.25 * ((M + 1)^2 * (2 - M) - nu * M * (M^2 - 1)^P) * p - f1p = 0.25 * rho * a * ((M + 1)^2 - mu * (M^2 - 1)^P) - f2p = f1p * v1 + p_plus - f3p = f1p * H - omega * rho * a^3 * M^2 * (M^2 - 1)^2 + f1p = 0.25 * rho * a * ((M + 1)^2 - mu * (M^2 - 1)^P) + f2p = f1p * v1 + p_plus + f3p = f1p * H - omega * rho * a^3 * M^2 * (M^2 - 1)^2 - return SVector(f1p, f2p, f3p) + return SVector(f1p, f2p, f3p) end @inline function splitting_coirier_vanleer(u, ::Val{:minus}, orientation::Integer, - equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + equations::CompressibleEulerEquations1D) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - # sound speed and enthalpy - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho + # sound speed and enthalpy + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho - # signed Mach number - M = v1 / a + # signed Mach number + M = v1 / a - P = 2 - mu = 1.0 - nu = 0.75 - omega = 2.0 # adjusted from suggested value of 1.5 + P = 2 + mu = 1.0 + nu = 0.75 + omega = 2.0 # adjusted from suggested value of 1.5 - p_minus = 0.25 * ((M - 1)^2 * (2 + M) + nu * M * (M^2 - 1)^P) * p + p_minus = 0.25 * ((M - 1)^2 * (2 + M) + nu * M * (M^2 - 1)^P) * p - f1m = -0.25 * rho * a * ((M - 1)^2 - mu * (M^2 - 1)^P) - f2m = f1m * v1 + p_minus - f3m = f1m * H + omega * rho * a^3 * M^2 * (M^2 - 1)^2 + f1m = -0.25 * rho * a * ((M - 1)^2 - mu * (M^2 - 1)^P) + f2m = f1m * v1 + p_minus + f3m = f1m * H + omega * rho * a^3 * M^2 * (M^2 - 1)^2 - return SVector(f1m, f2m, f3m) + return SVector(f1m, f2m, f3m) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - rho_ll, rho_v1_ll, rho_e_ll = u_ll - rho_rr, rho_v1_rr, rho_e_rr = u_rr +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + rho_ll, rho_v1_ll, rho_e_ll = u_ll + rho_rr, rho_v1_rr, rho_e_rr = u_rr - # Calculate primitive variables and speed of sound - v1_ll = rho_v1_ll / rho_ll - v_mag_ll = abs(v1_ll) - p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * v_mag_ll^2) - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - v1_rr = rho_v1_rr / rho_rr - v_mag_rr = abs(v1_rr) - p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * v_mag_rr^2) - c_rr = sqrt(equations.gamma * p_rr / rho_rr) + # Calculate primitive variables and speed of sound + v1_ll = rho_v1_ll / rho_ll + v_mag_ll = abs(v1_ll) + p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * v_mag_ll^2) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + v1_rr = rho_v1_rr / rho_rr + v_mag_rr = abs(v1_rr) + p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * v_mag_rr^2) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) - λ_max = max(v_mag_ll, v_mag_rr) + max(c_ll, c_rr) + λ_max = max(v_mag_ll, v_mag_rr) + max(c_ll, c_rr) end - # Calculate minimum and maximum wave speeds for HLL-type fluxes -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations) - λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) + λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) - return λ_min, λ_max + return λ_min, λ_max end - """ flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D) @@ -668,79 +667,80 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf) Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140) """ -function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Calculate primitive variables and speed of sound - rho_ll, rho_v1_ll, rho_e_ll = u_ll - rho_rr, rho_v1_rr, rho_e_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - e_ll = rho_e_ll / rho_ll - p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * v1_ll^2) - c_ll = sqrt(equations.gamma*p_ll/rho_ll) - - v1_rr = rho_v1_rr / rho_rr - e_rr = rho_e_rr / rho_rr - p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * v1_rr^2 ) - c_rr = sqrt(equations.gamma*p_rr/rho_rr) - - # Obtain left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr - vel_L = v1_ll - vel_R = v1_rr - vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho - ekin_roe = 0.5 * vel_roe^2 - H_ll = (rho_e_ll + p_ll) / rho_ll - H_rr = (rho_e_rr + p_rr) / rho_rr - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) - - Ssl = min(vel_L - c_ll, vel_roe - c_roe) - Ssr = max(vel_R + c_rr, vel_roe + c_roe) - sMu_L = Ssl - vel_L - sMu_R = Ssr - vel_R - if Ssl >= 0.0 - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - elseif Ssr <= 0.0 - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - else - SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R) - if Ssl <= 0.0 <= SStar - densStar = rho_ll*sMu_L / (Ssl-SStar) - enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) - UStar1 = densStar - UStar2 = densStar*SStar - UStar3 = densStar*enerStar - - f1 = f_ll[1]+Ssl*(UStar1 - rho_ll) - f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll) - f3 = f_ll[3]+Ssl*(UStar3 - rho_e_ll) +function flux_hllc(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Calculate primitive variables and speed of sound + rho_ll, rho_v1_ll, rho_e_ll = u_ll + rho_rr, rho_v1_rr, rho_e_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + e_ll = rho_e_ll / rho_ll + p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * v1_ll^2) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + v1_rr = rho_v1_rr / rho_rr + e_rr = rho_e_rr / rho_rr + p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * v1_rr^2) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Obtain left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr + vel_L = v1_ll + vel_R = v1_rr + vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho + ekin_roe = 0.5 * vel_roe^2 + H_ll = (rho_e_ll + p_ll) / rho_ll + H_rr = (rho_e_rr + p_rr) / rho_rr + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) + + Ssl = min(vel_L - c_ll, vel_roe - c_roe) + Ssr = max(vel_R + c_rr, vel_roe + c_roe) + sMu_L = Ssl - vel_L + sMu_R = Ssr - vel_R + if Ssl >= 0.0 + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + elseif Ssr <= 0.0 + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] else - densStar = rho_rr*sMu_R / (Ssr-SStar) - enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) - UStar1 = densStar - UStar2 = densStar*SStar - UStar3 = densStar*enerStar - - #end - f1 = f_rr[1]+Ssr*(UStar1 - rho_rr) - f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr) - f3 = f_rr[3]+Ssr*(UStar3 - rho_e_rr) + SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) / + (rho_ll * sMu_L - rho_rr * sMu_R) + if Ssl <= 0.0 <= SStar + densStar = rho_ll * sMu_L / (Ssl - SStar) + enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) + UStar1 = densStar + UStar2 = densStar * SStar + UStar3 = densStar * enerStar + + f1 = f_ll[1] + Ssl * (UStar1 - rho_ll) + f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll) + f3 = f_ll[3] + Ssl * (UStar3 - rho_e_ll) + else + densStar = rho_rr * sMu_R / (Ssr - SStar) + enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) + UStar1 = densStar + UStar2 = densStar * SStar + UStar3 = densStar * enerStar + + #end + f1 = f_rr[1] + Ssr * (UStar1 - rho_rr) + f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr) + f3 = f_rr[3] + Ssr * (UStar3 - rho_e_rr) + end end - end - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end - """ flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D) @@ -759,195 +759,189 @@ Compactly summarized: Numerical methods for conservation laws and related equations. [Link](https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf) """ -function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D) - # Calculate primitive variables, enthalpy and speed of sound - rho_ll, v_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v_rr, p_rr = cons2prim(u_rr, equations) - - # `u_ll[3]` is total energy `rho_e_ll` on the left - H_ll = (u_ll[3] + p_ll) / rho_ll - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - - # `u_rr[3]` is total energy `rho_e_rr` on the right - H_rr = (u_rr[3] + p_rr) / rho_rr - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) - - v_roe = (sqrt_rho_ll * v_ll + sqrt_rho_rr * v_rr) * inv_sum_sqrt_rho - v_roe_mag = v_roe^2 - - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) - - # Compute convenience constant for positivity preservation, see - # https://doi.org/10.1016/0021-9991(91)90211-3 - beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) - - # Estimate the edges of the Riemann fan (with positivity conservation) - SsL = min(v_roe - c_roe, v_ll - beta * c_ll, zero(v_roe)) - SsR = max(v_roe + c_roe, v_rr + beta * c_rr, zero(v_roe)) - - if SsL >= 0.0 && SsR > 0.0 - # Positive supersonic speed - f_ll = flux(u_ll, orientation, equations) - - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - elseif SsR <= 0.0 && SsL < 0.0 - # Negative supersonic speed - f_rr = flux(u_rr, orientation, equations) - - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - else - # Subsonic case - # Compute left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL) - f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL) - f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL) - end +function flux_hlle(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations1D) + # Calculate primitive variables, enthalpy and speed of sound + rho_ll, v_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v_rr, p_rr = cons2prim(u_rr, equations) + + # `u_ll[3]` is total energy `rho_e_ll` on the left + H_ll = (u_ll[3] + p_ll) / rho_ll + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + # `u_rr[3]` is total energy `rho_e_rr` on the right + H_rr = (u_rr[3] + p_rr) / rho_rr + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) + + v_roe = (sqrt_rho_ll * v_ll + sqrt_rho_rr * v_rr) * inv_sum_sqrt_rho + v_roe_mag = v_roe^2 + + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) + + # Compute convenience constant for positivity preservation, see + # https://doi.org/10.1016/0021-9991(91)90211-3 + beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) + + # Estimate the edges of the Riemann fan (with positivity conservation) + SsL = min(v_roe - c_roe, v_ll - beta * c_ll, zero(v_roe)) + SsR = max(v_roe + c_roe, v_rr + beta * c_rr, zero(v_roe)) + + if SsL >= 0.0 && SsR > 0.0 + # Positive supersonic speed + f_ll = flux(u_ll, orientation, equations) + + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + elseif SsR <= 0.0 && SsL < 0.0 + # Negative supersonic speed + f_rr = flux(u_rr, orientation, equations) + + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] + else + # Subsonic case + # Compute left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / + (SsR - SsL) + f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / + (SsR - SsL) + f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / + (SsR - SsL) + end - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end - @inline function max_abs_speeds(u, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 1/2 * rho * v1^2) - c = sqrt(equations.gamma * p / rho) + rho, rho_v1, rho_e = u + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 1 / 2 * rho * v1^2) + c = sqrt(equations.gamma * p / rho) - return (abs(v1) + c,) + return (abs(v1) + c,) end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u + rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) + v1 = rho_v1 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1) - return SVector(rho, v1, p) + return SVector(rho, v1, p) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u + rho, rho_v1, rho_e = u - v1 = rho_v1 / rho - v_square = v1^2 - p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p + v1 = rho_v1 / rho + v_square = v1^2 + p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p - w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = -rho_p + w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = -rho_p - return SVector(w1, w2, w3) + return SVector(w1, w2, w3) end @inline function entropy2cons(w, equations::CompressibleEulerEquations1D) - # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD - # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) - @unpack gamma = equations + # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD + # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) + @unpack gamma = equations - # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) - # instead of `-rho * s / (gamma - 1)` - V1, V2, V5 = w .* (gamma - 1) + # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) + # instead of `-rho * s / (gamma - 1)` + V1, V2, V5 = w .* (gamma - 1) - # specific entropy, eq. (53) - s = gamma - V1 + 0.5 * (V2^2) / V5 + # specific entropy, eq. (53) + s = gamma - V1 + 0.5 * (V2^2) / V5 - # eq. (52) - energy_internal = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * exp(-s * equations.inv_gamma_minus_one) + # eq. (52) + energy_internal = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * + exp(-s * equations.inv_gamma_minus_one) - # eq. (51) - rho = -V5 * energy_internal - rho_v1 = V2 * energy_internal - rho_e = (1 - 0.5 * (V2^2) / V5) * energy_internal - return SVector(rho, rho_v1, rho_e) + # eq. (51) + rho = -V5 * energy_internal + rho_v1 = V2 * energy_internal + rho_e = (1 - 0.5 * (V2^2) / V5) * energy_internal + return SVector(rho, rho_v1, rho_e) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::CompressibleEulerEquations1D) - rho, v1, p = prim - rho_v1 = rho * v1 - rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1) - return SVector(rho, rho_v1, rho_e) + rho, v1, p = prim + rho_v1 = rho * v1 + rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1) + return SVector(rho, rho_v1, rho_e) end - @inline function density(u, equations::CompressibleEulerEquations1D) - rho = u[1] - return rho + rho = u[1] + return rho end @inline function pressure(u, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho) - return p + rho, rho_v1, rho_e = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho) + return p end - @inline function density_pressure(u, equations::CompressibleEulerEquations1D) - rho, rho_v1, rho_e = u - rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2)) - return rho_times_p + rho, rho_v1, rho_e = u + rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2)) + return rho_times_p end - # Calculate thermodynamic entropy for a conservative state `cons` @inline function entropy_thermodynamic(cons, equations::CompressibleEulerEquations1D) - # Pressure - p = (equations.gamma - 1) * (cons[3] - 1/2 * (cons[2]^2) / cons[1]) + # Pressure + p = (equations.gamma - 1) * (cons[3] - 1 / 2 * (cons[2]^2) / cons[1]) - # Thermodynamic entropy - s = log(p) - equations.gamma*log(cons[1]) + # Thermodynamic entropy + s = log(p) - equations.gamma * log(cons[1]) - return s + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::CompressibleEulerEquations1D) - # Mathematical entropy - S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one + # Mathematical entropy + S = -entropy_thermodynamic(cons, equations) * cons[1] * + equations.inv_gamma_minus_one - return S + return S end - # Default entropy is the mathematical entropy -@inline entropy(cons, equations::CompressibleEulerEquations1D) = entropy_math(cons, equations) - +@inline function entropy(cons, equations::CompressibleEulerEquations1D) + entropy_math(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::CompressibleEulerEquations1D) = cons[3] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(cons, equations::CompressibleEulerEquations1D) - return 0.5 * (cons[2]^2)/cons[1] + return 0.5 * (cons[2]^2) / cons[1] end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::CompressibleEulerEquations1D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - - end # @muladd diff --git a/src/equations/compressible_euler_2d.jl b/src/equations/compressible_euler_2d.jl index 4877f6891c1..66e3c7bff84 100644 --- a/src/equations/compressible_euler_2d.jl +++ b/src/equations/compressible_euler_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" CompressibleEulerEquations2D(gamma) @@ -37,21 +37,22 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2) \right) ``` the pressure. """ -struct CompressibleEulerEquations2D{RealT<:Real} <: AbstractCompressibleEulerEquations{2, 4} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - function CompressibleEulerEquations2D(gamma) - γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) - new{typeof(γ)}(γ, inv_gamma_minus_one) - end +struct CompressibleEulerEquations2D{RealT <: Real} <: + AbstractCompressibleEulerEquations{2, 4} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + function CompressibleEulerEquations2D(gamma) + γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) + new{typeof(γ)}(γ, inv_gamma_minus_one) + end end - -varnames(::typeof(cons2cons), ::CompressibleEulerEquations2D) = ("rho", "rho_v1", "rho_v2", "rho_e") +function varnames(::typeof(cons2cons), ::CompressibleEulerEquations2D) + ("rho", "rho_v1", "rho_v2", "rho_e") +end varnames(::typeof(cons2prim), ::CompressibleEulerEquations2D) = ("rho", "v1", "v2", "p") - # Set initial conditions at physical location `x` for time `t` """ initial_condition_constant(x, t, equations::CompressibleEulerEquations2D) @@ -59,14 +60,13 @@ varnames(::typeof(cons2prim), ::CompressibleEulerEquations2D) = ("rho", "v1", "v A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::CompressibleEulerEquations2D) - rho = 1.0 - rho_v1 = 0.1 - rho_v2 = -0.2 - rho_e = 10.0 - return SVector(rho, rho_v1, rho_v2, rho_e) + rho = 1.0 + rho_v1 = 0.1 + rho_v2 = -0.2 + rho_e = 10.0 + return SVector(rho, rho_v1, rho_v2, rho_e) end - """ initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations2D) @@ -74,20 +74,21 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations2D) - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - ini = c + A * sin(ω * (x[1] + x[2] - t)) - - rho = ini - rho_v1 = ini - rho_v2 = ini - rho_e = ini^2 - - return SVector(rho, rho_v1, rho_v2, rho_e) +function initial_condition_convergence_test(x, t, + equations::CompressibleEulerEquations2D) + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + ini = c + A * sin(ω * (x[1] + x[2] - t)) + + rho = ini + rho_v1 = ini + rho_v2 = ini + rho_e = ini^2 + + return SVector(rho, rho_v1, rho_v2, rho_e) end """ @@ -97,32 +98,32 @@ Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations2D) - # Same settings as in `initial_condition` - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - γ = equations.gamma - - x1, x2 = x - si, co = sincos(ω * (x1 + x2 - t)) - rho = c + A * si - rho_x = ω * A * co - # Note that d/dt rho = -d/dx rho = -d/dy rho. - - tmp = (2 * rho - 1) * (γ - 1) - - du1 = rho_x - du2 = rho_x * (1 + tmp) - du3 = du2 - du4 = 2 * rho_x * (rho + tmp) - - return SVector(du1, du2, du3, du4) +@inline function source_terms_convergence_test(u, x, t, + equations::CompressibleEulerEquations2D) + # Same settings as in `initial_condition` + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + γ = equations.gamma + + x1, x2 = x + si, co = sincos(ω * (x1 + x2 - t)) + rho = c + A * si + rho_x = ω * A * co + # Note that d/dt rho = -d/dx rho = -d/dy rho. + + tmp = (2 * rho - 1) * (γ - 1) + + du1 = rho_x + du2 = rho_x * (1 + tmp) + du3 = du2 + du4 = 2 * rho_x * (rho + tmp) + + return SVector(du1, du2, du3, du4) end - """ initial_condition_density_wave(x, t, equations::CompressibleEulerEquations2D) @@ -138,17 +139,16 @@ with the following parameters - polydeg = 5 """ function initial_condition_density_wave(x, t, equations::CompressibleEulerEquations2D) - v1 = 0.1 - v2 = 0.2 - rho = 1 + 0.98 * sinpi(2 * (x[1] + x[2] - t * (v1 + v2))) - rho_v1 = rho * v1 - rho_v2 = rho * v2 - p = 20 - rho_e = p / (equations.gamma - 1) + 1/2 * rho * (v1^2 + v2^2) - return SVector(rho, rho_v1, rho_v2, rho_e) + v1 = 0.1 + v2 = 0.2 + rho = 1 + 0.98 * sinpi(2 * (x[1] + x[2] - t * (v1 + v2))) + rho_v1 = rho * v1 + rho_v2 = rho * v2 + p = 20 + rho_e = p / (equations.gamma - 1) + 1 / 2 * rho * (v1^2 + v2^2) + return SVector(rho, rho_v1, rho_v2, rho_e) end - """ initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations2D) @@ -157,26 +157,26 @@ A weak blast wave taken from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations2D) - # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Set up polar coordinates - inicenter = SVector(0.0, 0.0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - r = sqrt(x_norm^2 + y_norm^2) - phi = atan(y_norm, x_norm) - sin_phi, cos_phi = sincos(phi) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, v2, p), equations) +function initial_condition_weak_blast_wave(x, t, + equations::CompressibleEulerEquations2D) + # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Set up polar coordinates + inicenter = SVector(0.0, 0.0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + phi = atan(y_norm, x_norm) + sin_phi, cos_phi = sincos(phi) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, v2, p), equations) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations2D) @@ -187,22 +187,23 @@ Setup used for convergence tests of the Euler equations with self-gravity used i in combination with [`source_terms_eoc_test_coupled_euler_gravity`](@ref) or [`source_terms_eoc_test_euler`](@ref). """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations2D) - # OBS! this assumes that γ = 2 other manufactured source terms are incorrect - if equations.gamma != 2.0 - error("adiabatic constant must be 2 for the coupling convergence test") - end - c = 2.0 - A = 0.1 - ini = c + A * sin(pi * (x[1] + x[2] - t)) - G = 1.0 # gravitational constant - - rho = ini - v1 = 1.0 - v2 = 1.0 - p = ini^2 * G / pi # * 2 / ndims, but ndims==2 here - - return prim2cons(SVector(rho, v1, v2, p), equations) +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::CompressibleEulerEquations2D) + # OBS! this assumes that γ = 2 other manufactured source terms are incorrect + if equations.gamma != 2.0 + error("adiabatic constant must be 2 for the coupling convergence test") + end + c = 2.0 + A = 0.1 + ini = c + A * sin(pi * (x[1] + x[2] - t)) + G = 1.0 # gravitational constant + + rho = ini + v1 = 1.0 + v2 = 1.0 + p = ini^2 * G / pi # * 2 / ndims, but ndims==2 here + + return prim2cons(SVector(rho, v1, v2, p), equations) end """ @@ -214,24 +215,25 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref). """ -@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, equations::CompressibleEulerEquations2D) - # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` - c = 2.0 - A = 0.1 - G = 1.0 # gravitational constant, must match coupling solver - C_grav = -2 * G / pi # 2 == 4 / ndims - - x1, x2 = x - si, co = sincos(pi * (x1 + x2 - t)) - rhox = A * pi * co - rho = c + A * si - - du1 = rhox - du2 = rhox - du3 = rhox - du4 = (1.0 - C_grav*rho)*rhox - - return SVector(du1, du2, du3, du4) +@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, + equations::CompressibleEulerEquations2D) + # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` + c = 2.0 + A = 0.1 + G = 1.0 # gravitational constant, must match coupling solver + C_grav = -2 * G / pi # 2 == 4 / ndims + + x1, x2 = x + si, co = sincos(pi * (x1 + x2 - t)) + rhox = A * pi * co + rho = c + A * si + + du1 = rhox + du2 = rhox + du3 = rhox + du4 = (1.0 - C_grav * rho) * rhox + + return SVector(du1, du2, du3, du4) end """ @@ -243,27 +245,27 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref). """ -@inline function source_terms_eoc_test_euler(u, x, t, equations::CompressibleEulerEquations2D) - # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` - c = 2.0 - A = 0.1 - G = 1.0 - C_grav = -2 * G / pi # 2 == 4 / ndims - - x1, x2 = x - si, co = sincos(pi * (x1 + x2 - t)) - rhox = A * pi * co - rho = c + A * si - - du1 = rhox - du2 = rhox * (1 - C_grav * rho) - du3 = rhox * (1 - C_grav * rho) - du4 = rhox * (1 - 3 * C_grav * rho) - - return SVector(du1, du2, du3, du4) +@inline function source_terms_eoc_test_euler(u, x, t, + equations::CompressibleEulerEquations2D) + # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` + c = 2.0 + A = 0.1 + G = 1.0 + C_grav = -2 * G / pi # 2 == 4 / ndims + + x1, x2 = x + si, co = sincos(pi * (x1 + x2 - t)) + rhox = A * pi * co + rho = c + A * si + + du1 = rhox + du2 = rhox * (1 - C_grav * rho) + du3 = rhox * (1 - C_grav * rho) + du4 = rhox * (1 - 3 * C_grav * rho) + + return SVector(du1, du2, du3, du4) end - """ boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function, equations::CompressibleEulerEquations2D) @@ -290,36 +292,40 @@ Should be used together with [`UnstructuredMesh2D`](@ref). x, t, surface_flux_function, equations::CompressibleEulerEquations2D) + norm_ = norm(normal_direction) + # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later + normal = normal_direction / norm_ + + # rotate the internal solution state + u_local = rotate_to_x(u_inner, normal, equations) + + # compute the primitive variables + rho_local, v_normal, v_tangent, p_local = cons2prim(u_local, equations) + + # Get the solution of the pressure Riemann problem + # See Section 6.3.3 of + # Eleuterio F. Toro (2009) + # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction + # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761) + if v_normal <= 0.0 + sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed + p_star = p_local * + (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * + equations.gamma * + equations.inv_gamma_minus_one) + else # v_normal > 0.0 + A = 2 / ((equations.gamma + 1) * rho_local) + B = p_local * (equations.gamma - 1) / (equations.gamma + 1) + p_star = p_local + + 0.5 * v_normal / A * + (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B))) + end - norm_ = norm(normal_direction) - # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later - normal = normal_direction / norm_ - - # rotate the internal solution state - u_local = rotate_to_x(u_inner, normal, equations) - - # compute the primitive variables - rho_local, v_normal, v_tangent, p_local = cons2prim(u_local, equations) - - # Get the solution of the pressure Riemann problem - # See Section 6.3.3 of - # Eleuterio F. Toro (2009) - # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction - # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761) - if v_normal <= 0.0 - sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed - p_star = p_local * (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * equations.gamma * equations.inv_gamma_minus_one) - else # v_normal > 0.0 - A = 2 / ((equations.gamma + 1) * rho_local) - B = p_local * (equations.gamma - 1) / (equations.gamma + 1) - p_star = p_local + 0.5 * v_normal / A * (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B))) - end - - # For the slip wall we directly set the flux as the normal velocity is zero - return SVector(zero(eltype(u_inner)), - p_star * normal[1], - p_star * normal[2], - zero(eltype(u_inner))) * norm_ + # For the slip wall we directly set the flux as the normal velocity is zero + return SVector(zero(eltype(u_inner)), + p_star * normal[1], + p_star * normal[2], + zero(eltype(u_inner))) * norm_ end """ @@ -332,16 +338,16 @@ Should be used together with [`TreeMesh`](@ref). direction, x, t, surface_flux_function, equations::CompressibleEulerEquations2D) - # get the appropriate normal vector from the orientation - if orientation == 1 - normal_direction = SVector(1, 0) - else # orientation == 2 - normal_direction = SVector(0, 1) - end - - # compute and return the flux using `boundary_condition_slip_wall` routine above - return boundary_condition_slip_wall(u_inner, normal_direction, direction, - x, t, surface_flux_function, equations) + # get the appropriate normal vector from the orientation + if orientation == 1 + normal_direction = SVector(1, 0) + else # orientation == 2 + normal_direction = SVector(0, 1) + end + + # compute and return the flux using `boundary_condition_slip_wall` routine above + return boundary_condition_slip_wall(u_inner, normal_direction, direction, + x, t, surface_flux_function, equations) end """ @@ -354,56 +360,57 @@ Should be used together with [`StructuredMesh`](@ref). direction, x, t, surface_flux_function, equations::CompressibleEulerEquations2D) - # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back - # to be inward pointing on the -x and -y sides due to the orientation convention used by StructuredMesh - if isodd(direction) - boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction, - x, t, surface_flux_function, equations) - else - boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction, - x, t, surface_flux_function, equations) - end - - return boundary_flux -end + # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back + # to be inward pointing on the -x and -y sides due to the orientation convention used by StructuredMesh + if isodd(direction) + boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction, + x, t, surface_flux_function, + equations) + else + boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction, + x, t, surface_flux_function, + equations) + end + return boundary_flux +end # Calculate 2D flux for a single point @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - if orientation == 1 - f1 = rho_v1 - f2 = rho_v1 * v1 + p - f3 = rho_v1 * v2 - f4 = (rho_e + p) * v1 - else - f1 = rho_v2 - f2 = rho_v2 * v1 - f3 = rho_v2 * v2 + p - f4 = (rho_e + p) * v2 - end - return SVector(f1, f2, f3, f4) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + if orientation == 1 + f1 = rho_v1 + f2 = rho_v1 * v1 + p + f3 = rho_v1 * v2 + f4 = (rho_e + p) * v1 + else + f1 = rho_v2 + f2 = rho_v2 * v1 + f3 = rho_v2 * v2 + p + f4 = (rho_e + p) * v2 + end + return SVector(f1, f2, f3, f4) end # Calculate 2D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - rho_e = last(u) - rho, v1, v2, p = cons2prim(u, equations) - - v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] - rho_v_normal = rho * v_normal - f1 = rho_v_normal - f2 = rho_v_normal * v1 + p * normal_direction[1] - f3 = rho_v_normal * v2 + p * normal_direction[2] - f4 = (rho_e + p) * v_normal - return SVector(f1, f2, f3, f4) +@inline function flux(u, normal_direction::AbstractVector, + equations::CompressibleEulerEquations2D) + rho_e = last(u) + rho, v1, v2, p = cons2prim(u, equations) + + v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + rho_v_normal = rho * v_normal + f1 = rho_v_normal + f2 = rho_v_normal * v1 + p * normal_direction[1] + f3 = rho_v_normal * v2 + p * normal_direction[2] + f4 = (rho_e + p) * v_normal + return SVector(f1, f2, f3, f4) end - """ flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerEquations2D) @@ -420,61 +427,63 @@ The modification is in the energy flux to guarantee pressure equilibrium and was compressible flows [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060) """ -@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - v2_avg = 1/2 * ( v2_ll + v2_rr) - p_avg = 1/2 * ( p_ll + p_rr) - kin_avg = 1/2 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll) - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg - else - pv2_avg = 1/2 * (p_ll*v2_rr + p_rr*v2_ll) - f1 = rho_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg - end - - return SVector(f1, f2, f3, f4) -end +@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + v2_avg = 1 / 2 * (v2_ll + v2_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + kin_avg = 1 / 2 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll) + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg + else + pv2_avg = 1 / 2 * (p_ll * v2_rr + p_rr * v2_ll) + f1 = rho_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg + end -@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - v2_avg = 1/2 * ( v2_ll + v2_rr) - v_dot_n_avg = 1/2 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 1/2 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end +@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + v2_avg = 1 / 2 * (v2_ll + v2_rr) + v_dot_n_avg = 1 / 2 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + return SVector(f1, f2, f3, f4) +end """ flux_kennedy_gruber(u_ll, u_rr, orientation_or_normal_direction, @@ -486,60 +495,61 @@ Kinetic energy preserving two-point flux by Navier-Stokes equations for a compressible fluid [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020) """ -@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_e_ll = last(u_ll) - rho_e_rr = last(u_rr) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - v2_avg = 1/2 * ( v2_ll + v2_rr) - p_avg = 1/2 * ( p_ll + p_rr) - e_avg = 1/2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_avg * v1_avg - f2 = rho_avg * v1_avg * v1_avg + p_avg - f3 = rho_avg * v1_avg * v2_avg - f4 = (rho_avg * e_avg + p_avg) * v1_avg - else - f1 = rho_avg * v2_avg - f2 = rho_avg * v2_avg * v1_avg - f3 = rho_avg * v2_avg * v2_avg + p_avg - f4 = (rho_avg * e_avg + p_avg) * v2_avg - end - - return SVector(f1, f2, f3, f4) -end +@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_e_ll = last(u_ll) + rho_e_rr = last(u_rr) + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + v2_avg = 1 / 2 * (v2_ll + v2_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + e_avg = 1 / 2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_avg * v1_avg + f2 = rho_avg * v1_avg * v1_avg + p_avg + f3 = rho_avg * v1_avg * v2_avg + f4 = (rho_avg * e_avg + p_avg) * v1_avg + else + f1 = rho_avg * v2_avg + f2 = rho_avg * v2_avg * v1_avg + f3 = rho_avg * v2_avg * v2_avg + p_avg + f4 = (rho_avg * e_avg + p_avg) * v2_avg + end -@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_e_ll = last(u_ll) - rho_e_rr = last(u_rr) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Average each factor of products in flux - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] - p_avg = 0.5 * (p_ll + p_rr) - e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = f1 * e_avg + p_avg * v_dot_n_avg - - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end +@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_e_ll = last(u_ll) + rho_e_rr = last(u_rr) + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Average each factor of products in flux + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] + p_avg = 0.5 * (p_ll + p_rr) + e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = f1 * e_avg + p_avg * v_dot_n_avg + + return SVector(f1, f2, f3, f4) +end """ flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D) @@ -550,41 +560,43 @@ Entropy conserving two-point flux by for Compressible Euler and Navier-Stokes Equations [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a) """ -@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - beta_ll = 0.5 * rho_ll / p_ll - beta_rr = 0.5 * rho_rr / p_rr - specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2) - specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2) - - # Compute the necessary mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - rho_mean = ln_mean(rho_ll, rho_rr) - beta_mean = ln_mean(beta_ll, beta_rr) - beta_avg = 0.5 * (beta_ll + beta_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_mean = 0.5 * rho_avg / beta_avg - velocity_square_avg = specific_kin_ll + specific_kin_rr - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_mean - f3 = f1 * v2_avg - f4 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg + f3*v2_avg - else - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_mean - f4 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg + f3*v2_avg - end - - return SVector(f1, f2, f3, f4) -end +@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2) + specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2) + + # Compute the necessary mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_mean = 0.5 * rho_avg / beta_avg + velocity_square_avg = specific_kin_ll + specific_kin_rr + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + f3 = f1 * v2_avg + f4 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + f3 * v2_avg + else + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_mean + f4 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + f3 * v2_avg + end + return SVector(f1, f2, f3, f4) +end """ flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, @@ -601,68 +613,74 @@ See also the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - else - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll) - end - - return SVector(f1, f2, f3, f4) -end +@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Compute the necessary mean values + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + else + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + end -@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - - # Compute the necessary mean values - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end +@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations2D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + # Compute the necessary mean values + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + return SVector(f1, f2, f3, f4) +end """ splitting_steger_warming(u, orientation::Integer, @@ -690,104 +708,107 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_steger_warming(u, orientation::Integer, equations::CompressibleEulerEquations2D) - fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) - fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) + fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - a = sqrt(equations.gamma * p / rho) - - if orientation == 1 - lambda1 = v1 - lambda2 = v1 + a - lambda3 = v1 - a - - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) - - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) - f3p = rho_2gamma * alpha_p * v2 - f4p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - else # orientation == 2 - lambda1 = v2 - lambda2 = v2 + a - lambda3 = v2 - a - - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) - - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * alpha_p * v1 - f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p)) - f4p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - end - return SVector(f1p, f2p, f3p, f4p) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + a = sqrt(equations.gamma * p / rho) + + if orientation == 1 + lambda1 = v1 + lambda2 = v1 + a + lambda3 = v1 - a + + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) + + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) + f3p = rho_2gamma * alpha_p * v2 + f4p = rho_2gamma * + (alpha_p * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + else # orientation == 2 + lambda1 = v2 + lambda2 = v2 + a + lambda3 = v2 - a + + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) + + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * alpha_p * v1 + f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p)) + f4p = rho_2gamma * + (alpha_p * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + end + return SVector(f1p, f2p, f3p, f4p) end @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - a = sqrt(equations.gamma * p / rho) - - if orientation == 1 - lambda1 = v1 - lambda2 = v1 + a - lambda3 = v1 - a - - lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) - lambda2_m = negative_part(lambda2) - lambda3_m = negative_part(lambda3) - - alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m - - rho_2gamma = 0.5 * rho / equations.gamma - f1m = rho_2gamma * alpha_m - f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m)) - f3m = rho_2gamma * alpha_m * v2 - f4m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - else # orientation == 2 - lambda1 = v2 - lambda2 = v2 + a - lambda3 = v2 - a - - lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) - lambda2_m = negative_part(lambda2) - lambda3_m = negative_part(lambda3) - - alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m - - rho_2gamma = 0.5 * rho / equations.gamma - f1m = rho_2gamma * alpha_m - f2m = rho_2gamma * alpha_m * v1 - f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m-lambda3_m)) - f4m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - end - return SVector(f1m, f2m, f3m, f4m) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + a = sqrt(equations.gamma * p / rho) + + if orientation == 1 + lambda1 = v1 + lambda2 = v1 + a + lambda3 = v1 - a + + lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) + lambda2_m = negative_part(lambda2) + lambda3_m = negative_part(lambda3) + + alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m + + rho_2gamma = 0.5 * rho / equations.gamma + f1m = rho_2gamma * alpha_m + f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m)) + f3m = rho_2gamma * alpha_m * v2 + f4m = rho_2gamma * + (alpha_m * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_m - lambda3_m) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + else # orientation == 2 + lambda1 = v2 + lambda2 = v2 + a + lambda3 = v2 - a + + lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) + lambda2_m = negative_part(lambda2) + lambda3_m = negative_part(lambda3) + + alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m + + rho_2gamma = 0.5 * rho / equations.gamma + f1m = rho_2gamma * alpha_m + f2m = rho_2gamma * alpha_m * v1 + f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m)) + f4m = rho_2gamma * + (alpha_m * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_m - lambda3_m) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + end + return SVector(f1m, f2m, f3m, f4m) end - """ splitting_vanleer_haenel(u, orientation::Integer, equations::CompressibleEulerEquations2D) @@ -824,72 +845,71 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_vanleer_haenel(u, orientation::Integer, equations::CompressibleEulerEquations2D) - fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations) - fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations) + fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_vanleer_haenel(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho - - if orientation == 1 - M = v1 / a - p_plus = 0.5 * (1 + equations.gamma * M) * p - - f1p = 0.25 * rho * a * (M + 1)^2 - f2p = f1p * v1 + p_plus - f3p = f1p * v2 - f4p = f1p * H - else # orientation == 2 - M = v2 / a - p_plus = 0.5 * (1 + equations.gamma * M) * p - - f1p = 0.25 * rho * a * (M + 1)^2 - f2p = f1p * v1 - f3p = f1p * v2 + p_plus - f4p = f1p * H - end - return SVector(f1p, f2p, f3p, f4p) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho + + if orientation == 1 + M = v1 / a + p_plus = 0.5 * (1 + equations.gamma * M) * p + + f1p = 0.25 * rho * a * (M + 1)^2 + f2p = f1p * v1 + p_plus + f3p = f1p * v2 + f4p = f1p * H + else # orientation == 2 + M = v2 / a + p_plus = 0.5 * (1 + equations.gamma * M) * p + + f1p = 0.25 * rho * a * (M + 1)^2 + f2p = f1p * v1 + f3p = f1p * v2 + p_plus + f4p = f1p * H + end + return SVector(f1p, f2p, f3p, f4p) end @inline function splitting_vanleer_haenel(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho - - if orientation == 1 - M = v1 / a - p_minus = 0.5 * (1 - equations.gamma * M) * p - - f1m= -0.25 * rho * a * (M - 1)^2 - f2m = f1m * v1 + p_minus - f3m = f1m * v2 - f4m = f1m * H - else # orientation == 2 - M = v2 / a - p_minus = 0.5 * (1 - equations.gamma * M) * p - - f1m= -0.25 * rho * a * (M - 1)^2 - f2m = f1m * v1 - f3m = f1m * v2 + p_minus - f4m = f1m * H - end - return SVector(f1m, f2m, f3m, f4m) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho + + if orientation == 1 + M = v1 / a + p_minus = 0.5 * (1 - equations.gamma * M) * p + + f1m = -0.25 * rho * a * (M - 1)^2 + f2m = f1m * v1 + p_minus + f3m = f1m * v2 + f4m = f1m * H + else # orientation == 2 + M = v2 / a + p_minus = 0.5 * (1 - equations.gamma * M) * p + + f1m = -0.25 * rho * a * (M - 1)^2 + f2m = f1m * v1 + f3m = f1m * v2 + p_minus + f4m = f1m * H + end + return SVector(f1m, f2m, f3m, f4m) end - """ splitting_lax_friedrichs(u, orientation::Integer, equations::CompressibleEulerEquations2D) @@ -911,184 +931,183 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_lax_friedrichs(u, orientation::Integer, equations::CompressibleEulerEquations2D) - fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations) - fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations) + fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_lax_friedrichs(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho - lambda = 0.5 * (sqrt(v1^2 + v2^2) + a) - - if orientation == 1 - #lambda = 0.5 * (abs(v1) + a) - f1p = 0.5 * rho * v1 + lambda * u[1] - f2p = 0.5 * rho * v1 * v1 + 0.5 * p + lambda * u[2] - f3p = 0.5 * rho * v1 * v2 + lambda * u[3] - f4p = 0.5 * rho * v1 * H + lambda * u[4] - else # orientation == 2 - #lambda = 0.5 * (abs(v2) + a) - f1p = 0.5 * rho * v2 + lambda * u[1] - f2p = 0.5 * rho * v2 * v1 + lambda * u[2] - f3p = 0.5 * rho * v2 * v2 + 0.5 * p + lambda * u[3] - f4p = 0.5 * rho * v2 * H + lambda * u[4] - end - return SVector(f1p, f2p, f3p, f4p) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho + lambda = 0.5 * (sqrt(v1^2 + v2^2) + a) + + if orientation == 1 + #lambda = 0.5 * (abs(v1) + a) + f1p = 0.5 * rho * v1 + lambda * u[1] + f2p = 0.5 * rho * v1 * v1 + 0.5 * p + lambda * u[2] + f3p = 0.5 * rho * v1 * v2 + lambda * u[3] + f4p = 0.5 * rho * v1 * H + lambda * u[4] + else # orientation == 2 + #lambda = 0.5 * (abs(v2) + a) + f1p = 0.5 * rho * v2 + lambda * u[1] + f2p = 0.5 * rho * v2 * v1 + lambda * u[2] + f3p = 0.5 * rho * v2 * v2 + 0.5 * p + lambda * u[3] + f4p = 0.5 * rho * v2 * H + lambda * u[4] + end + return SVector(f1p, f2p, f3p, f4p) end @inline function splitting_lax_friedrichs(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - a = sqrt(equations.gamma * p / rho) - H = (rho_e + p) / rho - lambda = 0.5 * (sqrt(v1^2 + v2^2) + a) - - if orientation == 1 - #lambda = 0.5 * (abs(v1) + a) - f1m = 0.5 * rho * v1 - lambda * u[1] - f2m = 0.5 * rho * v1 * v1 + 0.5 * p - lambda * u[2] - f3m = 0.5 * rho * v1 * v2 - lambda * u[3] - f4m = 0.5 * rho * v1 * H - lambda * u[4] - else # orientation == 2 - #lambda = 0.5 * (abs(v2) + a) - f1m = 0.5 * rho * v2 - lambda * u[1] - f2m = 0.5 * rho * v2 * v1 - lambda * u[2] - f3m = 0.5 * rho * v2 * v2 + 0.5 * p - lambda * u[3] - f4m = 0.5 * rho * v2 * H - lambda * u[4] - end - return SVector(f1m, f2m, f3m, f4m) + rho, rho_v1, rho_v2, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + a = sqrt(equations.gamma * p / rho) + H = (rho_e + p) / rho + lambda = 0.5 * (sqrt(v1^2 + v2^2) + a) + + if orientation == 1 + #lambda = 0.5 * (abs(v1) + a) + f1m = 0.5 * rho * v1 - lambda * u[1] + f2m = 0.5 * rho * v1 * v1 + 0.5 * p - lambda * u[2] + f3m = 0.5 * rho * v1 * v2 - lambda * u[3] + f4m = 0.5 * rho * v1 * H - lambda * u[4] + else # orientation == 2 + #lambda = 0.5 * (abs(v2) + a) + f1m = 0.5 * rho * v2 - lambda * u[1] + f2m = 0.5 * rho * v2 * v1 - lambda * u[2] + f3m = 0.5 * rho * v2 * v2 + 0.5 * p - lambda * u[3] + f4m = 0.5 * rho * v2 * H - lambda * u[4] + end + return SVector(f1m, f2m, f3m, f4m) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Get the velocity value in the appropriate direction - if orientation == 1 - v_ll = v1_ll - v_rr = v1_rr - else # orientation == 2 - v_ll = v2_ll - v_rr = v2_rr - end - # Calculate sound speeds - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) -end - - -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # Calculate normal velocities and sound speed - # left - v_ll = ( v1_ll * normal_direction[1] - + v2_ll * normal_direction[2] ) - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - # right - v_rr = ( v1_rr * normal_direction[1] - + v2_rr * normal_direction[2] ) - c_rr = sqrt(equations.gamma * p_rr / rho_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Get the velocity value in the appropriate direction + if orientation == 1 + v_ll = v1_ll + v_rr = v1_rr + else # orientation == 2 + v_ll = v2_ll + v_rr = v2_rr + end + # Calculate sound speeds + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) - return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) + λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) end +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations2D) + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # Calculate normal velocities and sound speed + # left + v_ll = (v1_ll * normal_direction[1] + + + v2_ll * normal_direction[2]) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + # right + v_rr = (v1_rr * normal_direction[1] + + + v2_rr * normal_direction[2]) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) +end # Calculate minimum and maximum wave speeds for HLL-type fluxes @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - if orientation == 1 # x-direction - λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) - else # y-direction - λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr) - end - - return λ_min, λ_max + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + if orientation == 1 # x-direction + λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) + else # y-direction + λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr) + end + + return λ_min, λ_max end @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D) - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - norm_ = norm(normal_direction) - # The v_normals are already scaled by the norm - λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_ - λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_ + norm_ = norm(normal_direction) + # The v_normals are already scaled by the norm + λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_ + λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_ - return λ_min, λ_max + return λ_min, λ_max end - # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction # has been normalized prior to this rotation of the state vector @inline function rotate_to_x(u, normal_vector, equations::CompressibleEulerEquations2D) - # cos and sin of the angle between the x-axis and the normalized normal_vector are - # the normalized vector's x and y coordinates respectively (see unit circle). - c = normal_vector[1] - s = normal_vector[2] - - # Apply the 2D rotation matrix with normal and tangent directions of the form - # [ 1 0 0 0; - # 0 n_1 n_2 0; - # 0 t_1 t_2 0; - # 0 0 0 1 ] - # where t_1 = -n_2 and t_2 = n_1 - - return SVector(u[1], - c * u[2] + s * u[3], - -s * u[2] + c * u[3], - u[4]) + # cos and sin of the angle between the x-axis and the normalized normal_vector are + # the normalized vector's x and y coordinates respectively (see unit circle). + c = normal_vector[1] + s = normal_vector[2] + + # Apply the 2D rotation matrix with normal and tangent directions of the form + # [ 1 0 0 0; + # 0 n_1 n_2 0; + # 0 t_1 t_2 0; + # 0 0 0 1 ] + # where t_1 = -n_2 and t_2 = n_1 + + return SVector(u[1], + c * u[2] + s * u[3], + -s * u[2] + c * u[3], + u[4]) end - # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction # has been normalized prior to this back-rotation of the state vector -@inline function rotate_from_x(u, normal_vector, equations::CompressibleEulerEquations2D) - # cos and sin of the angle between the x-axis and the normalized normal_vector are - # the normalized vector's x and y coordinates respectively (see unit circle). - c = normal_vector[1] - s = normal_vector[2] - - # Apply the 2D back-rotation matrix with normal and tangent directions of the form - # [ 1 0 0 0; - # 0 n_1 t_1 0; - # 0 n_2 t_2 0; - # 0 0 0 1 ] - # where t_1 = -n_2 and t_2 = n_1 - - return SVector(u[1], - c * u[2] - s * u[3], - s * u[2] + c * u[3], - u[4]) +@inline function rotate_from_x(u, normal_vector, + equations::CompressibleEulerEquations2D) + # cos and sin of the angle between the x-axis and the normalized normal_vector are + # the normalized vector's x and y coordinates respectively (see unit circle). + c = normal_vector[1] + s = normal_vector[2] + + # Apply the 2D back-rotation matrix with normal and tangent directions of the form + # [ 1 0 0 0; + # 0 n_1 t_1 0; + # 0 n_2 t_2 0; + # 0 0 0 1 ] + # where t_1 = -n_2 and t_2 = n_1 + + return SVector(u[1], + c * u[2] - s * u[3], + s * u[2] + c * u[3], + u[4]) end - """ flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D) @@ -1096,101 +1115,102 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf) Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140) """ -function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Calculate primitive variables and speed of sound - rho_ll, rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - e_ll = rho_e_ll / rho_ll - p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * (v1_ll^2 + v2_ll^2)) - c_ll = sqrt(equations.gamma*p_ll/rho_ll) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - e_rr = rho_e_rr / rho_rr - p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * (v1_rr^2 + v2_rr^2)) - c_rr = sqrt(equations.gamma*p_rr/rho_rr) - - # Obtain left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr - if orientation == 1 # x-direction - vel_L = v1_ll - vel_R = v1_rr - ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 - elseif orientation == 2 # y-direction - vel_L = v2_ll - vel_R = v2_rr - ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 - end - vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho - ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2) - H_ll = (rho_e_ll + p_ll) / rho_ll - H_rr = (rho_e_rr + p_rr) / rho_rr - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) - Ssl = min(vel_L - c_ll, vel_roe - c_roe) - Ssr = max(vel_R + c_rr, vel_roe + c_roe) - sMu_L = Ssl - vel_L - sMu_R = Ssr - vel_R - - if Ssl >= 0.0 - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - f4 = f_ll[4] - elseif Ssr <= 0.0 - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - f4 = f_rr[4] - else - SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R) - if Ssl <= 0.0 <= SStar - densStar = rho_ll*sMu_L / (Ssl-SStar) - enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) - UStar1 = densStar - UStar4 = densStar*enerStar - if orientation == 1 # x-direction - UStar2 = densStar*SStar - UStar3 = densStar*v2_ll - elseif orientation == 2 # y-direction - UStar2 = densStar*v1_ll - UStar3 = densStar*SStar - end - f1 = f_ll[1]+Ssl*(UStar1 - rho_ll) - f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll) - f3 = f_ll[3]+Ssl*(UStar3 - rho_v2_ll) - f4 = f_ll[4]+Ssl*(UStar4 - rho_e_ll) +function flux_hllc(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Calculate primitive variables and speed of sound + rho_ll, rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + e_ll = rho_e_ll / rho_ll + p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * (v1_ll^2 + v2_ll^2)) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + e_rr = rho_e_rr / rho_rr + p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * (v1_rr^2 + v2_rr^2)) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Obtain left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr + if orientation == 1 # x-direction + vel_L = v1_ll + vel_R = v1_rr + ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 + elseif orientation == 2 # y-direction + vel_L = v2_ll + vel_R = v2_rr + ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + end + vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho + ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2) + H_ll = (rho_e_ll + p_ll) / rho_ll + H_rr = (rho_e_rr + p_rr) / rho_rr + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) + Ssl = min(vel_L - c_ll, vel_roe - c_roe) + Ssr = max(vel_R + c_rr, vel_roe + c_roe) + sMu_L = Ssl - vel_L + sMu_R = Ssr - vel_R + + if Ssl >= 0.0 + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + f4 = f_ll[4] + elseif Ssr <= 0.0 + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] + f4 = f_rr[4] else - densStar = rho_rr*sMu_R / (Ssr-SStar) - enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) - UStar1 = densStar - UStar4 = densStar*enerStar - if orientation == 1 # x-direction - UStar2 = densStar*SStar - UStar3 = densStar*v2_rr - elseif orientation == 2 # y-direction - UStar2 = densStar*v1_rr - UStar3 = densStar*SStar - end - f1 = f_rr[1]+Ssr*(UStar1 - rho_rr) - f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr) - f3 = f_rr[3]+Ssr*(UStar3 - rho_v2_rr) - f4 = f_rr[4]+Ssr*(UStar4 - rho_e_rr) + SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) / + (rho_ll * sMu_L - rho_rr * sMu_R) + if Ssl <= 0.0 <= SStar + densStar = rho_ll * sMu_L / (Ssl - SStar) + enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) + UStar1 = densStar + UStar4 = densStar * enerStar + if orientation == 1 # x-direction + UStar2 = densStar * SStar + UStar3 = densStar * v2_ll + elseif orientation == 2 # y-direction + UStar2 = densStar * v1_ll + UStar3 = densStar * SStar + end + f1 = f_ll[1] + Ssl * (UStar1 - rho_ll) + f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll) + f3 = f_ll[3] + Ssl * (UStar3 - rho_v2_ll) + f4 = f_ll[4] + Ssl * (UStar4 - rho_e_ll) + else + densStar = rho_rr * sMu_R / (Ssr - SStar) + enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) + UStar1 = densStar + UStar4 = densStar * enerStar + if orientation == 1 # x-direction + UStar2 = densStar * SStar + UStar3 = densStar * v2_rr + elseif orientation == 2 # y-direction + UStar2 = densStar * v1_rr + UStar3 = densStar * SStar + end + f1 = f_rr[1] + Ssr * (UStar1 - rho_rr) + f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr) + f3 = f_rr[3] + Ssr * (UStar3 - rho_v2_rr) + f4 = f_rr[4] + Ssr * (UStar4 - rho_e_rr) + end end - end - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end - """ flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D) @@ -1206,171 +1226,167 @@ of the numerical flux. On Godunov-type methods near low densities. [DOI: 10.1016/0021-9991(91)90211-3](https://doi.org/10.1016/0021-9991(91)90211-3) """ -function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D) - # Calculate primitive variables, enthalpy and speed of sound - rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) - - # `u_ll[4]` is total energy `rho_e_ll` on the left - H_ll = (u_ll[4] + p_ll) / rho_ll - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - - # `u_rr[4]` is total energy `rho_e_rr` on the right - H_rr = (u_rr[4] + p_rr) / rho_rr - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) - - v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho - v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho - v_roe_mag = v1_roe^2 + v2_roe^2 - - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) - - # Compute convenience constant for positivity preservation, see - # https://doi.org/10.1016/0021-9991(91)90211-3 - beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) - - # Estimate the edges of the Riemann fan (with positivity conservation) - if orientation == 1 # x-direction - SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe)) - SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe)) - elseif orientation == 2 # y-direction - SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe)) - SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe)) - end - - if SsL >= 0.0 && SsR > 0.0 - # Positive supersonic speed - f_ll = flux(u_ll, orientation, equations) - - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - f4 = f_ll[4] - elseif SsR <= 0.0 && SsL < 0.0 - # Negative supersonic speed - f_rr = flux(u_rr, orientation, equations) - - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - f4 = f_rr[4] - else - # Subsonic case - # Compute left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) +function flux_hlle(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations2D) + # Calculate primitive variables, enthalpy and speed of sound + rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations) + + # `u_ll[4]` is total energy `rho_e_ll` on the left + H_ll = (u_ll[4] + p_ll) / rho_ll + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + # `u_rr[4]` is total energy `rho_e_rr` on the right + H_rr = (u_rr[4] + p_rr) / rho_rr + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) + + v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho + v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho + v_roe_mag = v1_roe^2 + v2_roe^2 + + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) + + # Compute convenience constant for positivity preservation, see + # https://doi.org/10.1016/0021-9991(91)90211-3 + beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) + + # Estimate the edges of the Riemann fan (with positivity conservation) + if orientation == 1 # x-direction + SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe)) + SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe)) + elseif orientation == 2 # y-direction + SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe)) + SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe)) + end - f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL) - f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL) - f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL) - f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / (SsR - SsL) - end + if SsL >= 0.0 && SsR > 0.0 + # Positive supersonic speed + f_ll = flux(u_ll, orientation, equations) + + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + f4 = f_ll[4] + elseif SsR <= 0.0 && SsL < 0.0 + # Negative supersonic speed + f_rr = flux(u_rr, orientation, equations) + + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] + f4 = f_rr[4] + else + # Subsonic case + # Compute left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / + (SsR - SsL) + f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / + (SsR - SsL) + f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / + (SsR - SsL) + f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / + (SsR - SsL) + end - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end - @inline function max_abs_speeds(u, equations::CompressibleEulerEquations2D) - rho, v1, v2, p = cons2prim(u, equations) - c = sqrt(equations.gamma * p / rho) + rho, v1, v2, p = cons2prim(u, equations) + c = sqrt(equations.gamma * p / rho) - return abs(v1) + c, abs(v2) + c + return abs(v1) + c, abs(v2) + c end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u + rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - return SVector(rho, v1, v2, p) + return SVector(rho, v1, v2, p) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u + rho, rho_v1, rho_v2, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v_square = v1^2 + v2^2 - p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v_square = v1^2 + v2^2 + p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p - w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = rho_p * v2 - w4 = -rho_p + w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = rho_p * v2 + w4 = -rho_p - return SVector(w1, w2, w3, w4) + return SVector(w1, w2, w3, w4) end @inline function entropy2cons(w, equations::CompressibleEulerEquations2D) - # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD - # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) - @unpack gamma = equations - - # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) - # instead of `-rho * s / (gamma - 1)` - V1, V2, V3, V5 = w .* (gamma-1) - - # s = specific entropy, eq. (53) - s = gamma - V1 + (V2^2 + V3^2)/(2*V5) - - # eq. (52) - rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one) - - # eq. (51) - rho = -rho_iota * V5 - rho_v1 = rho_iota * V2 - rho_v2 = rho_iota * V3 - rho_e = rho_iota * (1-(V2^2 + V3^2)/(2*V5)) - return SVector(rho, rho_v1, rho_v2, rho_e) + # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD + # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) + @unpack gamma = equations + + # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) + # instead of `-rho * s / (gamma - 1)` + V1, V2, V3, V5 = w .* (gamma - 1) + + # s = specific entropy, eq. (53) + s = gamma - V1 + (V2^2 + V3^2) / (2 * V5) + + # eq. (52) + rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * + exp(-s * equations.inv_gamma_minus_one) + + # eq. (51) + rho = -rho_iota * V5 + rho_v1 = rho_iota * V2 + rho_v2 = rho_iota * V3 + rho_e = rho_iota * (1 - (V2^2 + V3^2) / (2 * V5)) + return SVector(rho, rho_v1, rho_v2, rho_e) end - - - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::CompressibleEulerEquations2D) - rho, v1, v2, p = prim - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2) - return SVector(rho, rho_v1, rho_v2, rho_e) + rho, v1, v2, p = prim + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2) + return SVector(rho, rho_v1, rho_v2, rho_e) end - @inline function density(u, equations::CompressibleEulerEquations2D) - rho = u[1] - return rho + rho = u[1] + return rho end - @inline function pressure(u, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho) - return p + rho, rho_v1, rho_v2, rho_e = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho) + return p end - @inline function density_pressure(u, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2)) - return rho_times_p + rho, rho_v1, rho_v2, rho_e = u + rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2)) + return rho_times_p end - # Calculates the entropy flux in direction "orientation" and the entropy variables for a state cons # NOTE: This method seems to work currently (b82534e) but is never used anywhere. Thus it is # commented here until someone uses it or writes a test for it. @@ -1394,47 +1410,42 @@ end # return entropy, entropy_flux # end - # Calculate thermodynamic entropy for a conservative state `cons` @inline function entropy_thermodynamic(cons, equations::CompressibleEulerEquations2D) - # Pressure - p = (equations.gamma - 1) * (cons[4] - 1/2 * (cons[2]^2 + cons[3]^2) / cons[1]) + # Pressure + p = (equations.gamma - 1) * (cons[4] - 1 / 2 * (cons[2]^2 + cons[3]^2) / cons[1]) - # Thermodynamic entropy - s = log(p) - equations.gamma*log(cons[1]) + # Thermodynamic entropy + s = log(p) - equations.gamma * log(cons[1]) - return s + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::CompressibleEulerEquations2D) - # Mathematical entropy - S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one + # Mathematical entropy + S = -entropy_thermodynamic(cons, equations) * cons[1] * + equations.inv_gamma_minus_one - return S + return S end - # Default entropy is the mathematical entropy -@inline entropy(cons, equations::CompressibleEulerEquations2D) = entropy_math(cons, equations) - +@inline function entropy(cons, equations::CompressibleEulerEquations2D) + entropy_math(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::CompressibleEulerEquations2D) = cons[4] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::CompressibleEulerEquations2D) - rho, rho_v1, rho_v2, rho_e = u - return (rho_v1^2 + rho_v2^2) / (2 * rho) + rho, rho_v1, rho_v2, rho_e = u + return (rho_v1^2 + rho_v2^2) / (2 * rho) end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::CompressibleEulerEquations2D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - - end # @muladd diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl index c56b7114669..c16a454b176 100644 --- a/src/equations/compressible_euler_3d.jl +++ b/src/equations/compressible_euler_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" CompressibleEulerEquations3D(gamma) @@ -42,20 +42,23 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2+v_3^2) \right) ``` the pressure. """ -struct CompressibleEulerEquations3D{RealT<:Real} <: AbstractCompressibleEulerEquations{3, 5} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - function CompressibleEulerEquations3D(gamma) - γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) - new{typeof(γ)}(γ, inv_gamma_minus_one) - end +struct CompressibleEulerEquations3D{RealT <: Real} <: + AbstractCompressibleEulerEquations{3, 5} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + function CompressibleEulerEquations3D(gamma) + γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) + new{typeof(γ)}(γ, inv_gamma_minus_one) + end end - -varnames(::typeof(cons2cons), ::CompressibleEulerEquations3D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e") -varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D) = ("rho", "v1", "v2", "v3", "p") - +function varnames(::typeof(cons2cons), ::CompressibleEulerEquations3D) + ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e") +end +function varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D) + ("rho", "v1", "v2", "v3", "p") +end # Set initial conditions at physical location `x` for time `t` """ @@ -64,36 +67,36 @@ varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D) = ("rho", "v1", "v A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::CompressibleEulerEquations3D) - rho = 1.0 - rho_v1 = 0.1 - rho_v2 = -0.2 - rho_v3 = 0.7 - rho_e = 10.0 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) + rho = 1.0 + rho_v1 = 0.1 + rho_v2 = -0.2 + rho_v3 = 0.7 + rho_e = 10.0 + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) end - """ initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations3D) A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref). """ -function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations3D) - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - ini = c + A * sin(ω * (x[1] + x[2] + x[3] - t)) - - rho = ini - rho_v1 = ini - rho_v2 = ini - rho_v3 = ini - rho_e = ini^2 - - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) +function initial_condition_convergence_test(x, t, + equations::CompressibleEulerEquations3D) + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + ini = c + A * sin(ω * (x[1] + x[2] + x[3] - t)) + + rho = ini + rho_v1 = ini + rho_v2 = ini + rho_v3 = ini + rho_e = ini^2 + + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) end """ @@ -102,33 +105,33 @@ end Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref). """ -@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations3D) - # Same settings as in `initial_condition` - c = 2 - A = 0.1 - L = 2 - f = 1/L - ω = 2 * pi * f - γ = equations.gamma - - x1, x2, x3 = x - si, co = sincos(ω * (x1 + x2 + x3 - t)) - rho = c + A * si - rho_x = ω * A * co - # Note that d/dt rho = -d/dx rho = -d/dy rho = - d/dz rho. - - tmp = (2 * rho - 1.5) * (γ - 1) - - du1 = 2 * rho_x - du2 = rho_x * (2 + tmp) - du3 = du2 - du4 = du2 - du5 = rho_x * (4 * rho + 3 * tmp) - - return SVector(du1, du2, du3, du4, du5) +@inline function source_terms_convergence_test(u, x, t, + equations::CompressibleEulerEquations3D) + # Same settings as in `initial_condition` + c = 2 + A = 0.1 + L = 2 + f = 1 / L + ω = 2 * pi * f + γ = equations.gamma + + x1, x2, x3 = x + si, co = sincos(ω * (x1 + x2 + x3 - t)) + rho = c + A * si + rho_x = ω * A * co + # Note that d/dt rho = -d/dx rho = -d/dy rho = - d/dz rho. + + tmp = (2 * rho - 1.5) * (γ - 1) + + du1 = 2 * rho_x + du2 = rho_x * (2 + tmp) + du3 = du2 + du4 = du2 + du5 = rho_x * (4 * rho + 3 * tmp) + + return SVector(du1, du2, du3, du4, du5) end - """ initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations3D) @@ -137,28 +140,28 @@ A weak blast wave taken from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations3D) - # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Set up spherical coordinates - inicenter = (0, 0, 0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - z_norm = x[3] - inicenter[3] - r = sqrt(x_norm^2 + y_norm^2 + z_norm^2) - phi = atan(y_norm, x_norm) - theta = iszero(r) ? 0.0 : acos(z_norm / r) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta) - v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta) - v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta) - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, v2, v3, p), equations) +function initial_condition_weak_blast_wave(x, t, + equations::CompressibleEulerEquations3D) + # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Set up spherical coordinates + inicenter = (0, 0, 0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + z_norm = x[3] - inicenter[3] + r = sqrt(x_norm^2 + y_norm^2 + z_norm^2) + phi = atan(y_norm, x_norm) + theta = iszero(r) ? 0.0 : acos(z_norm / r) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta) + v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta) + v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta) + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations3D) @@ -169,23 +172,24 @@ Setup used for convergence tests of the Euler equations with self-gravity used i in combination with [`source_terms_eoc_test_coupled_euler_gravity`](@ref) or [`source_terms_eoc_test_euler`](@ref). """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations3D) - # OBS! this assumes that γ = 2 other manufactured source terms are incorrect - if equations.gamma != 2.0 - error("adiabatic constant must be 2 for the coupling convergence test") - end - c = 2.0 - A = 0.1 - ini = c + A * sin(pi * (x[1] + x[2] + x[3] - t)) - G = 1.0 # gravitational constant - - rho = ini - v1 = 1.0 - v2 = 1.0 - v3 = 1.0 - p = ini^2 * G * 2 / (3 * pi) # "3" is the number of spatial dimensions - - return prim2cons(SVector(rho, v1, v2, v3, p), equations) +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::CompressibleEulerEquations3D) + # OBS! this assumes that γ = 2 other manufactured source terms are incorrect + if equations.gamma != 2.0 + error("adiabatic constant must be 2 for the coupling convergence test") + end + c = 2.0 + A = 0.1 + ini = c + A * sin(pi * (x[1] + x[2] + x[3] - t)) + G = 1.0 # gravitational constant + + rho = ini + v1 = 1.0 + v2 = 1.0 + v3 = 1.0 + p = ini^2 * G * 2 / (3 * pi) # "3" is the number of spatial dimensions + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) end """ @@ -197,27 +201,28 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref). """ -@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, equations::CompressibleEulerEquations3D) - # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` - c = 2.0 - A = 0.1 - G = 1.0 # gravitational constant, must match coupling solver - C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions # 2D: -2.0*G/pi - - x1, x2, x3 = x - # TODO: sincospi - si, co = sincos(pi * (x1 + x2 + x3 - t)) - rhox = A * pi * co - rho = c + A * si - - # In "2 * rhox", the "2" is "number of spatial dimensions minus one" - du1 = 2 * rhox - du2 = 2 * rhox - du3 = 2 * rhox - du4 = 2 * rhox - du5 = 2 * rhox * (3/2 - C_grav*rho) # "3" in "3/2" is the number of spatial dimensions - - return SVector(du1, du2, du3, du4, du5) +@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, + equations::CompressibleEulerEquations3D) + # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` + c = 2.0 + A = 0.1 + G = 1.0 # gravitational constant, must match coupling solver + C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions # 2D: -2.0*G/pi + + x1, x2, x3 = x + # TODO: sincospi + si, co = sincos(pi * (x1 + x2 + x3 - t)) + rhox = A * pi * co + rho = c + A * si + + # In "2 * rhox", the "2" is "number of spatial dimensions minus one" + du1 = 2 * rhox + du2 = 2 * rhox + du3 = 2 * rhox + du4 = 2 * rhox + du5 = 2 * rhox * (3 / 2 - C_grav * rho) # "3" in "3/2" is the number of spatial dimensions + + return SVector(du1, du2, du3, du4, du5) end """ @@ -235,28 +240,27 @@ in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref). [`source_terms_eoc_test_coupled_euler_gravity`](@ref) instead. """ function source_terms_eoc_test_euler(u, x, t, equations::CompressibleEulerEquations3D) - # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` - c = 2.0 - A = 0.1 - G = 1.0 - C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions - - x1, x2, x3 = x - # TODO: sincospi - si, co = sincos(pi * (x1 + x2 + x3 - t)) - rhox = A * pi * co - rho = c + A * si - - du1 = rhox * 2 - du2 = rhox * (2 - C_grav * rho) - du3 = rhox * (2 - C_grav * rho) - du4 = rhox * (2 - C_grav * rho) - du5 = rhox * (3 - 5 * C_grav * rho) - - return SVector(du1, du2, du3, du4, du5) + # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity` + c = 2.0 + A = 0.1 + G = 1.0 + C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions + + x1, x2, x3 = x + # TODO: sincospi + si, co = sincos(pi * (x1 + x2 + x3 - t)) + rhox = A * pi * co + rho = c + A * si + + du1 = rhox * 2 + du2 = rhox * (2 - C_grav * rho) + du3 = rhox * (2 - C_grav * rho) + du4 = rhox * (2 - C_grav * rho) + du5 = rhox * (3 - 5 * C_grav * rho) + + return SVector(du1, du2, du3, du4, du5) end - """ boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function, equations::CompressibleEulerEquations3D) @@ -281,46 +285,50 @@ Details about the 1D pressure Riemann solution can be found in Section 6.3.3 of x, t, surface_flux_function, equations::CompressibleEulerEquations3D) + norm_ = norm(normal_direction) + # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later + normal = normal_direction / norm_ + + # Some vector that can't be identical to normal_vector (unless normal_vector == 0) + tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1]) + # Orthogonal projection + tangent1 -= dot(normal, tangent1) * normal + tangent1 = normalize(tangent1) + + # Third orthogonal vector + tangent2 = normalize(cross(normal_direction, tangent1)) + + # rotate the internal solution state + u_local = rotate_to_x(u_inner, normal, tangent1, tangent2, equations) + + # compute the primitive variables + rho_local, v_normal, v_tangent1, v_tangent2, p_local = cons2prim(u_local, equations) + + # Get the solution of the pressure Riemann problem + # See Section 6.3.3 of + # Eleuterio F. Toro (2009) + # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction + # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761) + if v_normal <= 0.0 + sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed + p_star = p_local * + (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * + equations.gamma * + equations.inv_gamma_minus_one) + else # v_normal > 0.0 + A = 2 / ((equations.gamma + 1) * rho_local) + B = p_local * (equations.gamma - 1) / (equations.gamma + 1) + p_star = p_local + + 0.5 * v_normal / A * + (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B))) + end - norm_ = norm(normal_direction) - # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later - normal = normal_direction / norm_ - - # Some vector that can't be identical to normal_vector (unless normal_vector == 0) - tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1]) - # Orthogonal projection - tangent1 -= dot(normal, tangent1) * normal - tangent1 = normalize(tangent1) - - # Third orthogonal vector - tangent2 = normalize(cross(normal_direction, tangent1)) - - # rotate the internal solution state - u_local = rotate_to_x(u_inner, normal, tangent1, tangent2, equations) - - # compute the primitive variables - rho_local, v_normal, v_tangent1, v_tangent2, p_local = cons2prim(u_local, equations) - - # Get the solution of the pressure Riemann problem - # See Section 6.3.3 of - # Eleuterio F. Toro (2009) - # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction - # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761) - if v_normal <= 0.0 - sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed - p_star = p_local * (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * equations.gamma * equations.inv_gamma_minus_one) - else # v_normal > 0.0 - A = 2 / ((equations.gamma + 1) * rho_local) - B = p_local * (equations.gamma - 1) / (equations.gamma + 1) - p_star = p_local + 0.5 * v_normal / A * (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B))) - end - - # For the slip wall we directly set the flux as the normal velocity is zero - return SVector(zero(eltype(u_inner)), - p_star * normal[1], - p_star * normal[2], - p_star * normal[3], - zero(eltype(u_inner))) * norm_ + # For the slip wall we directly set the flux as the normal velocity is zero + return SVector(zero(eltype(u_inner)), + p_star * normal[1], + p_star * normal[2], + p_star * normal[3], + zero(eltype(u_inner))) * norm_ end """ @@ -333,18 +341,18 @@ Should be used together with [`TreeMesh`](@ref). direction, x, t, surface_flux_function, equations::CompressibleEulerEquations3D) - # get the appropriate normal vector from the orientation - if orientation == 1 - normal_direction = SVector(1.0, 0.0, 0.0) - elseif orientation == 2 - normal_direction = SVector(0.0, 1.0, 0.0) - else # orientation == 3 - normal_direction = SVector(0.0, 0.0, 1.0) - end - - # compute and return the flux using `boundary_condition_slip_wall` routine above - return boundary_condition_slip_wall(u_inner, normal_direction, direction, - x, t, surface_flux_function, equations) + # get the appropriate normal vector from the orientation + if orientation == 1 + normal_direction = SVector(1.0, 0.0, 0.0) + elseif orientation == 2 + normal_direction = SVector(0.0, 1.0, 0.0) + else # orientation == 3 + normal_direction = SVector(0.0, 0.0, 1.0) + end + + # compute and return the flux using `boundary_condition_slip_wall` routine above + return boundary_condition_slip_wall(u_inner, normal_direction, direction, + x, t, surface_flux_function, equations) end """ @@ -357,63 +365,66 @@ Should be used together with [`StructuredMesh`](@ref). direction, x, t, surface_flux_function, equations::CompressibleEulerEquations3D) - # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back - # to be inward pointing on the -x, -y, and -z sides due to the orientation convention used by StructuredMesh - if isodd(direction) - boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction, - x, t, surface_flux_function, equations) - else - boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction, - x, t, surface_flux_function, equations) - end - - return boundary_flux + # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back + # to be inward pointing on the -x, -y, and -z sides due to the orientation convention used by StructuredMesh + if isodd(direction) + boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction, + x, t, surface_flux_function, + equations) + else + boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction, + x, t, surface_flux_function, + equations) + end + + return boundary_flux end # Calculate 1D flux for a single point @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - if orientation == 1 - f1 = rho_v1 - f2 = rho_v1 * v1 + p - f3 = rho_v1 * v2 - f4 = rho_v1 * v3 - f5 = (rho_e + p) * v1 - elseif orientation == 2 - f1 = rho_v2 - f2 = rho_v2 * v1 - f3 = rho_v2 * v2 + p - f4 = rho_v2 * v3 - f5 = (rho_e + p) * v2 - else - f1 = rho_v3 - f2 = rho_v3 * v1 - f3 = rho_v3 * v2 - f4 = rho_v3 * v3 + p - f5 = (rho_e + p) * v3 - end - return SVector(f1, f2, f3, f4, f5) + rho, rho_v1, rho_v2, rho_v3, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + if orientation == 1 + f1 = rho_v1 + f2 = rho_v1 * v1 + p + f3 = rho_v1 * v2 + f4 = rho_v1 * v3 + f5 = (rho_e + p) * v1 + elseif orientation == 2 + f1 = rho_v2 + f2 = rho_v2 * v1 + f3 = rho_v2 * v2 + p + f4 = rho_v2 * v3 + f5 = (rho_e + p) * v2 + else + f1 = rho_v3 + f2 = rho_v3 * v1 + f3 = rho_v3 * v2 + f4 = rho_v3 * v3 + p + f5 = (rho_e + p) * v3 + end + return SVector(f1, f2, f3, f4, f5) end -@inline function flux(u, normal::AbstractVector, equations::CompressibleEulerEquations3D) - rho_e = last(u) - rho, v1, v2, v3, p = cons2prim(u, equations) - - v_normal = v1 * normal[1] + v2 * normal[2] + v3 * normal[3] - rho_v_normal = rho * v_normal - f1 = rho_v_normal - f2 = rho_v_normal * v1 + p * normal[1] - f3 = rho_v_normal * v2 + p * normal[2] - f4 = rho_v_normal * v3 + p * normal[3] - f5 = (rho_e + p) * v_normal - return SVector(f1, f2, f3, f4, f5) +@inline function flux(u, normal::AbstractVector, + equations::CompressibleEulerEquations3D) + rho_e = last(u) + rho, v1, v2, v3, p = cons2prim(u, equations) + + v_normal = v1 * normal[1] + v2 * normal[2] + v3 * normal[3] + rho_v_normal = rho * v_normal + f1 = rho_v_normal + f2 = rho_v_normal * v1 + p * normal[1] + f3 = rho_v_normal * v2 + p * normal[2] + f4 = rho_v_normal * v3 + p * normal[3] + f5 = (rho_e + p) * v_normal + return SVector(f1, f2, f3, f4, f5) end - """ flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerEquations3D) @@ -430,73 +441,77 @@ The modification is in the energy flux to guarantee pressure equilibrium and was compressible flows [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060) """ -@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - v2_avg = 1/2 * ( v2_ll + v2_rr) - v3_avg = 1/2 * ( v3_ll + v3_rr) - p_avg = 1/2 * ( p_ll + p_rr) - kin_avg = 1/2 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll) - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg - elseif orientation == 2 - pv2_avg = 1/2 * (p_ll*v2_rr + p_rr*v2_ll) - f1 = rho_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * v3_avg - f5 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg - else - pv3_avg = 1/2 * (p_ll*v3_rr + p_rr*v3_ll) - f1 = rho_avg * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_avg - f5 = p_avg*v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg - end - - return SVector(f1, f2, f3, f4, f5) -end +@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + v2_avg = 1 / 2 * (v2_ll + v2_rr) + v3_avg = 1 / 2 * (v3_ll + v3_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + kin_avg = 1 / 2 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll) + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg + elseif orientation == 2 + pv2_avg = 1 / 2 * (p_ll * v2_rr + p_rr * v2_ll) + f1 = rho_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * v3_avg + f5 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg + else + pv3_avg = 1 / 2 * (p_ll * v3_rr + p_rr * v3_ll) + f1 = rho_avg * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_avg + f5 = p_avg * v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg + end -@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3] - - # Average each factor of products in flux - rho_avg = 1/2 * (rho_ll + rho_rr) - v1_avg = 1/2 * ( v1_ll + v1_rr) - v2_avg = 1/2 * ( v2_ll + v2_rr) - v3_avg = 1/2 * ( v3_ll + v3_rr) - v_dot_n_avg = 1/2 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 1/2 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = f1 * v3_avg + p_avg * normal_direction[3] - f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end +@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3] + + # Average each factor of products in flux + rho_avg = 1 / 2 * (rho_ll + rho_rr) + v1_avg = 1 / 2 * (v1_ll + v1_rr) + v2_avg = 1 / 2 * (v2_ll + v2_rr) + v3_avg = 1 / 2 * (v3_ll + v3_rr) + v_dot_n_avg = 1 / 2 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 1 / 2 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = f1 * v3_avg + p_avg * normal_direction[3] + f5 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + return SVector(f1, f2, f3, f4, f5) +end """ flux_kennedy_gruber(u_ll, u_rr, orientation_or_normal_direction, @@ -508,79 +523,83 @@ Kinetic energy preserving two-point flux by Navier-Stokes equations for a compressible fluid [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020) """ -@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_e_ll = last(u_ll) - rho_e_rr = last(u_rr) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # Average each factor of products in flux - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = (rho_avg * e_avg + p_avg) * v1_avg - elseif orientation == 2 - f1 = rho_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * v3_avg - f5 = (rho_avg * e_avg + p_avg) * v2_avg - else - f1 = rho_avg * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_avg - f5 = (rho_avg * e_avg + p_avg) * v3_avg - end - - return SVector(f1, f2, f3, f4, f5) -end +@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_e_ll = last(u_ll) + rho_e_rr = last(u_rr) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # Average each factor of products in flux + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = (rho_avg * e_avg + p_avg) * v1_avg + elseif orientation == 2 + f1 = rho_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * v3_avg + f5 = (rho_avg * e_avg + p_avg) * v2_avg + else + f1 = rho_avg * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_avg + f5 = (rho_avg * e_avg + p_avg) * v3_avg + end -@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_e_ll = last(u_ll) - rho_e_rr = last(u_rr) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - - # Average each factor of products in flux - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] + v3_avg * normal_direction[3] - p_avg = 0.5 * ((equations.gamma - 1) * (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) + - (equations.gamma - 1) * (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2))) - e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = f1 * v3_avg + p_avg * normal_direction[3] - f5 = f1 * e_avg + p_avg * v_dot_n_avg - - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end +@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_e_ll = last(u_ll) + rho_e_rr = last(u_rr) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + + # Average each factor of products in flux + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] + + v3_avg * normal_direction[3] + p_avg = 0.5 * ((equations.gamma - 1) * + (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) + + (equations.gamma - 1) * + (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2))) + e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = f1 * v3_avg + p_avg * normal_direction[3] + f5 = f1 * e_avg + p_avg * v_dot_n_avg + + return SVector(f1, f2, f3, f4, f5) +end """ flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D) @@ -591,51 +610,54 @@ Entropy conserving two-point flux by for Compressible Euler and Navier-Stokes Equations [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a) """ -@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - beta_ll = 0.5 * rho_ll / p_ll - beta_rr = 0.5 * rho_rr / p_rr - specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2 + v3_ll^2) - specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2 + v3_rr^2) - - # Compute the necessary mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - rho_mean = ln_mean(rho_ll, rho_rr) - beta_mean = ln_mean(beta_ll, beta_rr) - beta_avg = 0.5 * (beta_ll + beta_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_mean = 0.5 * rho_avg / beta_avg - velocity_square_avg = specific_kin_ll + specific_kin_rr - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_mean - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg - elseif orientation == 2 - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_mean - f4 = f1 * v3_avg - f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg - else - f1 = rho_mean * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_mean - f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg - end - - return SVector(f1, f2, f3, f4, f5) -end +@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2 + v3_ll^2) + specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2 + v3_rr^2) + + # Compute the necessary mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_mean = 0.5 * rho_avg / beta_avg + velocity_square_avg = specific_kin_ll + specific_kin_rr + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + f3 * v2_avg + f4 * v3_avg + elseif orientation == 2 + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_mean + f4 = f1 * v3_avg + f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + f3 * v2_avg + f4 * v3_avg + else + f1 = rho_mean * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_mean + f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) + + f2 * v1_avg + f3 * v2_avg + f4 * v3_avg + end + return SVector(f1, f2, f3, f4, f5) +end """ flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, @@ -652,79 +674,89 @@ See also the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - elseif orientation == 2 - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * v3_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll) - else # orientation == 3 - f1 = rho_mean * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v3_rr + p_rr*v3_ll) - end - - return SVector(f1, f2, f3, f4, f5) -end +@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # Compute the necessary mean values + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + elseif orientation == 2 + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * v3_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + else # orientation == 3 + f1 = rho_mean * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v3_rr + p_rr * v3_ll) + end -@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3] - - # Compute the necessary mean values - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - f4 = f1 * v3_avg + p_avg * normal_direction[3] - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end +@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations3D) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3] + + # Compute the necessary mean values + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + f4 = f1 * v3_avg + p_avg * normal_direction[3] + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + return SVector(f1, f2, f3, f4, f5) +end """ splitting_steger_warming(u, orientation::Integer, @@ -752,146 +784,153 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_steger_warming(u, orientation::Integer, equations::CompressibleEulerEquations3D) - fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) - fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations) + fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - a = sqrt(equations.gamma * p / rho) - - if orientation == 1 - lambda1 = v1 - lambda2 = v1 + a - lambda3 = v1 - a - - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) - - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) - f3p = rho_2gamma * alpha_p * v2 - f4p = rho_2gamma * alpha_p * v3 - f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - elseif orientation == 2 - lambda1 = v2 - lambda2 = v2 + a - lambda3 = v2 - a - - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) - - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * alpha_p * v1 - f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p)) - f4p = rho_2gamma * alpha_p * v3 - f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - else # orientation == 3 - lambda1 = v3 - lambda2 = v3 + a - lambda3 = v3 - a - - lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) - lambda2_p = positive_part(lambda2) - lambda3_p = positive_part(lambda3) - - alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p - - rho_2gamma = 0.5 * rho / equations.gamma - f1p = rho_2gamma * alpha_p - f2p = rho_2gamma * alpha_p * v1 - f3p = rho_2gamma * alpha_p * v2 - f4p = rho_2gamma * (alpha_p * v3 + a * (lambda2_p - lambda3_p)) - f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_p - lambda3_p) - + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) - end - return SVector(f1p, f2p, f3p, f4p, f5p) + rho, rho_v1, rho_v2, rho_v3, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + a = sqrt(equations.gamma * p / rho) + + if orientation == 1 + lambda1 = v1 + lambda2 = v1 + a + lambda3 = v1 - a + + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) + + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p)) + f3p = rho_2gamma * alpha_p * v2 + f4p = rho_2gamma * alpha_p * v3 + f5p = rho_2gamma * + (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + elseif orientation == 2 + lambda1 = v2 + lambda2 = v2 + a + lambda3 = v2 - a + + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) + + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * alpha_p * v1 + f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p)) + f4p = rho_2gamma * alpha_p * v3 + f5p = rho_2gamma * + (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + else # orientation == 3 + lambda1 = v3 + lambda2 = v3 + a + lambda3 = v3 - a + + lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :) + lambda2_p = positive_part(lambda2) + lambda3_p = positive_part(lambda3) + + alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p + + rho_2gamma = 0.5 * rho / equations.gamma + f1p = rho_2gamma * alpha_p + f2p = rho_2gamma * alpha_p * v1 + f3p = rho_2gamma * alpha_p * v2 + f4p = rho_2gamma * (alpha_p * v3 + a * (lambda2_p - lambda3_p)) + f5p = rho_2gamma * + (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_p - lambda3_p) + + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one) + end + return SVector(f1p, f2p, f3p, f4p, f5p) end @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - a = sqrt(equations.gamma * p / rho) - - if orientation == 1 - lambda1 = v1 - lambda2 = v1 + a - lambda3 = v1 - a - - lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) - lambda2_m = negative_part(lambda2) - lambda3_m = negative_part(lambda3) - - alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m - - rho_2gamma = 0.5 * rho / equations.gamma - f1m = rho_2gamma * alpha_m - f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m)) - f3m = rho_2gamma * alpha_m * v2 - f4m = rho_2gamma * alpha_m * v3 - f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - elseif orientation == 2 - lambda1 = v2 - lambda2 = v2 + a - lambda3 = v2 - a - - lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) - lambda2_m = negative_part(lambda2) - lambda3_m = negative_part(lambda3) - - alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m - - rho_2gamma = 0.5 * rho / equations.gamma - f1m = rho_2gamma * alpha_m - f2m = rho_2gamma * alpha_m * v1 - f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m)) - f4m = rho_2gamma * alpha_m * v3 - f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - else # orientation == 3 - lambda1 = v3 - lambda2 = v3 + a - lambda3 = v3 - a - - lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) - lambda2_m = negative_part(lambda2) - lambda3_m = negative_part(lambda3) - - alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m - - rho_2gamma = 0.5 * rho / equations.gamma - f1m = rho_2gamma * alpha_m - f2m = rho_2gamma * alpha_m * v1 - f3m = rho_2gamma * alpha_m * v2 - f4m = rho_2gamma * (alpha_m * v3 + a * (lambda2_m - lambda3_m)) - f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_m - lambda3_m) - + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) - end - return SVector(f1m, f2m, f3m, f4m, f5m) + rho, rho_v1, rho_v2, rho_v3, rho_e = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + a = sqrt(equations.gamma * p / rho) + + if orientation == 1 + lambda1 = v1 + lambda2 = v1 + a + lambda3 = v1 - a + + lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) + lambda2_m = negative_part(lambda2) + lambda3_m = negative_part(lambda3) + + alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m + + rho_2gamma = 0.5 * rho / equations.gamma + f1m = rho_2gamma * alpha_m + f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m)) + f3m = rho_2gamma * alpha_m * v2 + f4m = rho_2gamma * alpha_m * v3 + f5m = rho_2gamma * + (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_m - lambda3_m) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + elseif orientation == 2 + lambda1 = v2 + lambda2 = v2 + a + lambda3 = v2 - a + + lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) + lambda2_m = negative_part(lambda2) + lambda3_m = negative_part(lambda3) + + alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m + + rho_2gamma = 0.5 * rho / equations.gamma + f1m = rho_2gamma * alpha_m + f2m = rho_2gamma * alpha_m * v1 + f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m)) + f4m = rho_2gamma * alpha_m * v3 + f5m = rho_2gamma * + (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_m - lambda3_m) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + else # orientation == 3 + lambda1 = v3 + lambda2 = v3 + a + lambda3 = v3 - a + + lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :) + lambda2_m = negative_part(lambda2) + lambda3_m = negative_part(lambda3) + + alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m + + rho_2gamma = 0.5 * rho / equations.gamma + f1m = rho_2gamma * alpha_m + f2m = rho_2gamma * alpha_m * v1 + f3m = rho_2gamma * alpha_m * v2 + f4m = rho_2gamma * (alpha_m * v3 + a * (lambda2_m - lambda3_m)) + f5m = rho_2gamma * + (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_m - lambda3_m) + + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one) + end + return SVector(f1m, f2m, f3m, f4m, f5m) end - """ FluxLMARS(c)(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerEquations3D) @@ -906,197 +945,204 @@ References: [DOI: 10.1175/MWR-D-12-00129.1](https://doi.org/10.1175/mwr-d-12-00129.1) """ struct FluxLMARS{SpeedOfSound} - # Estimate for the speed of sound - speed_of_sound::SpeedOfSound + # Estimate for the speed of sound + speed_of_sound::SpeedOfSound end -@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - c = flux_lmars.speed_of_sound - - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - if orientation == 1 - v_ll = v1_ll - v_rr = v1_rr - elseif orientation == 2 - v_ll = v2_ll - v_rr = v2_rr - else # orientation == 3 - v_ll = v3_ll - v_rr = v3_rr - end - - rho = 0.5 * (rho_ll + rho_rr) - p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) - v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) - - if v >= 0 - f1, f2, f3, f4, f5 = v * u_ll - else - f1, f2, f3, f4, f5 = v * u_rr - end - - if orientation == 1 - f2 += p - elseif orientation == 2 - f3 += p - else # orientation == 3 - f4 += p - end - f5 += p * v - - return SVector(f1, f2, f3, f4, f5) +@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + c = flux_lmars.speed_of_sound + + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + if orientation == 1 + v_ll = v1_ll + v_rr = v1_rr + elseif orientation == 2 + v_ll = v2_ll + v_rr = v2_rr + else # orientation == 3 + v_ll = v3_ll + v_rr = v3_rr + end + + rho = 0.5 * (rho_ll + rho_rr) + p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) + v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) + + if v >= 0 + f1, f2, f3, f4, f5 = v * u_ll + else + f1, f2, f3, f4, f5 = v * u_rr + end + + if orientation == 1 + f2 += p + elseif orientation == 2 + f3 += p + else # orientation == 3 + f4 += p + end + f5 += p * v + + return SVector(f1, f2, f3, f4, f5) end -@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - c = flux_lmars.speed_of_sound +@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations3D) + c = flux_lmars.speed_of_sound - # Unpack left and right state - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + # Unpack left and right state + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3] - v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3] + v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3] + v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3] - # Note that this is the same as computing v_ll and v_rr with a normalized normal vector - # and then multiplying v by `norm_` again, but this version is slightly faster. - norm_ = norm(normal_direction) + # Note that this is the same as computing v_ll and v_rr with a normalized normal vector + # and then multiplying v by `norm_` again, but this version is slightly faster. + norm_ = norm(normal_direction) - rho = 0.5 * (rho_ll + rho_rr) - p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) / norm_ - v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) * norm_ + rho = 0.5 * (rho_ll + rho_rr) + p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) / norm_ + v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) * norm_ - if v >= 0 - f1, f2, f3, f4, f5 = v * u_ll - else - f1, f2, f3, f4, f5 = v * u_rr - end + if v >= 0 + f1, f2, f3, f4, f5 = v * u_ll + else + f1, f2, f3, f4, f5 = v * u_rr + end - f2 += p * normal_direction[1] - f3 += p * normal_direction[2] - f4 += p * normal_direction[3] - f5 += p * v + f2 += p * normal_direction[1] + f3 += p * normal_direction[2] + f4 += p * normal_direction[3] + f5 += p * v - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # Get the velocity value in the appropriate direction - if orientation == 1 - v_ll = v1_ll - v_rr = v1_rr - elseif orientation == 2 - v_ll = v2_ll - v_rr = v2_rr - else # orientation == 3 - v_ll = v3_ll - v_rr = v3_rr - end - # Calculate sound speeds - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) -end +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # Get the velocity value in the appropriate direction + if orientation == 1 + v_ll = v1_ll + v_rr = v1_rr + elseif orientation == 2 + v_ll = v2_ll + v_rr = v2_rr + else # orientation == 3 + v_ll = v3_ll + v_rr = v3_rr + end + # Calculate sound speeds + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # Calculate normal velocities and sound speed - # left - v_ll = ( v1_ll * normal_direction[1] - + v2_ll * normal_direction[2] - + v3_ll * normal_direction[3] ) - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - # right - v_rr = ( v1_rr * normal_direction[1] - + v2_rr * normal_direction[2] - + v3_rr * normal_direction[3] ) - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) + λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) end +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::CompressibleEulerEquations3D) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # Calculate normal velocities and sound speed + # left + v_ll = (v1_ll * normal_direction[1] + + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3]) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + # right + v_rr = (v1_rr * normal_direction[1] + + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3]) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) +end # Calculate minimum and maximum wave speeds for HLL-type fluxes -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - if orientation == 1 # x-direction - λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) - elseif orientation == 2 # y-direction - λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr) - else # z-direction - λ_min = v3_ll - sqrt(equations.gamma * p_ll / rho_ll) - λ_max = v3_rr + sqrt(equations.gamma * p_rr / rho_rr) - end - - return λ_min, λ_max +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + if orientation == 1 # x-direction + λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr) + elseif orientation == 2 # y-direction + λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr) + else # z-direction + λ_min = v3_ll - sqrt(equations.gamma * p_ll / rho_ll) + λ_max = v3_rr + sqrt(equations.gamma * p_rr / rho_rr) + end + + return λ_min, λ_max end @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3] - v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3] + v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3] + v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3] - norm_ = norm(normal_direction) - # The v_normals are already scaled by the norm - λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_ - λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_ + norm_ = norm(normal_direction) + # The v_normals are already scaled by the norm + λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_ + λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_ - return λ_min, λ_max + return λ_min, λ_max end - # Rotate normal vector to x-axis; normal, tangent1 and tangent2 need to be orthonormal # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions # has been normalized prior to this rotation of the state vector -@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, equations::CompressibleEulerEquations3D) - # Multiply with [ 1 0 0 0 0; - # 0 ― normal_vector ― 0; - # 0 ― tangent1 ― 0; - # 0 ― tangent2 ― 0; - # 0 0 0 0 1 ] - return SVector(u[1], - normal_vector[1] * u[2] + normal_vector[2] * u[3] + normal_vector[3] * u[4], - tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4], - tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4], - u[5]) +@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, + equations::CompressibleEulerEquations3D) + # Multiply with [ 1 0 0 0 0; + # 0 ― normal_vector ― 0; + # 0 ― tangent1 ― 0; + # 0 ― tangent2 ― 0; + # 0 0 0 0 1 ] + return SVector(u[1], + normal_vector[1] * u[2] + normal_vector[2] * u[3] + + normal_vector[3] * u[4], + tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4], + tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4], + u[5]) end - # Rotate x-axis to normal vector; normal, tangent1 and tangent2 need to be orthonormal # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions # has been normalized prior to this back-rotation of the state vector -@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, equations::CompressibleEulerEquations3D) - # Multiply with [ 1 0 0 0 0; - # 0 | | | 0; - # 0 normal_vector tangent1 tangent2 0; - # 0 | | | 0; - # 0 0 0 0 1 ] - return SVector(u[1], - normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4], - normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4], - normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4], - u[5]) +@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, + equations::CompressibleEulerEquations3D) + # Multiply with [ 1 0 0 0 0; + # 0 | | | 0; + # 0 normal_vector tangent1 tangent2 0; + # 0 | | | 0; + # 0 0 0 0 1 ] + return SVector(u[1], + normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4], + normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4], + normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4], + u[5]) end - """ flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D) @@ -1104,124 +1150,129 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf) Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140) """ -function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Calculate primitive variables and speed of sound - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - e_ll = rho_e_ll / rho_ll - p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) - c_ll = sqrt(equations.gamma*p_ll/rho_ll) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - e_rr = rho_e_rr / rho_rr - p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2)) - c_rr = sqrt(equations.gamma*p_rr/rho_rr) - - # Obtain left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr - if orientation == 1 # x-direction - vel_L = v1_ll - vel_R = v1_rr - ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2 - elseif orientation == 2 # y-direction - vel_L = v2_ll - vel_R = v2_rr - ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2 - else # z-direction - vel_L = v3_ll - vel_R = v3_rr - ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 - end - vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho - ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2) - H_ll = (rho_e_ll + p_ll) / rho_ll - H_rr = (rho_e_rr + p_rr) / rho_rr - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) - Ssl = min(vel_L - c_ll, vel_roe - c_roe) - Ssr = max(vel_R + c_rr, vel_roe + c_roe) - sMu_L = Ssl - vel_L - sMu_R = Ssr - vel_R - - if Ssl >= 0.0 - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - f4 = f_ll[4] - f5 = f_ll[5] - elseif Ssr <= 0.0 - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - f4 = f_rr[4] - f5 = f_rr[5] - else - SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R) - if Ssl <= 0.0 <= SStar - densStar = rho_ll*sMu_L / (Ssl-SStar) - enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) - UStar1 = densStar - UStar5 = densStar*enerStar - if orientation == 1 # x-direction - UStar2 = densStar*SStar - UStar3 = densStar*v2_ll - UStar4 = densStar*v3_ll - elseif orientation == 2 # y-direction - UStar2 = densStar*v1_ll - UStar3 = densStar*SStar - UStar4 = densStar*v3_ll - else # z-direction - UStar2 = densStar*v1_ll - UStar3 = densStar*v2_ll - UStar4 = densStar*SStar - end - f1 = f_ll[1]+Ssl*(UStar1 - rho_ll) - f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll) - f3 = f_ll[3]+Ssl*(UStar3 - rho_v2_ll) - f4 = f_ll[4]+Ssl*(UStar4 - rho_v3_ll) - f5 = f_ll[5]+Ssl*(UStar5 - rho_e_ll) +function flux_hllc(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Calculate primitive variables and speed of sound + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + e_ll = rho_e_ll / rho_ll + p_ll = (equations.gamma - 1) * + (rho_e_ll - 1 / 2 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + e_rr = rho_e_rr / rho_rr + p_rr = (equations.gamma - 1) * + (rho_e_rr - 1 / 2 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2)) + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Obtain left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr + if orientation == 1 # x-direction + vel_L = v1_ll + vel_R = v1_rr + ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 + + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2 + elseif orientation == 2 # y-direction + vel_L = v2_ll + vel_R = v2_rr + ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2 + else # z-direction + vel_L = v3_ll + vel_R = v3_rr + ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + + (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 + end + vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho + ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2) + H_ll = (rho_e_ll + p_ll) / rho_ll + H_rr = (rho_e_rr + p_rr) / rho_rr + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe)) + Ssl = min(vel_L - c_ll, vel_roe - c_roe) + Ssr = max(vel_R + c_rr, vel_roe + c_roe) + sMu_L = Ssl - vel_L + sMu_R = Ssr - vel_R + + if Ssl >= 0.0 + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + f4 = f_ll[4] + f5 = f_ll[5] + elseif Ssr <= 0.0 + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] + f4 = f_rr[4] + f5 = f_rr[5] else - densStar = rho_rr*sMu_R / (Ssr-SStar) - enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) - UStar1 = densStar - UStar5 = densStar*enerStar - if orientation == 1 # x-direction - UStar2 = densStar*SStar - UStar3 = densStar*v2_rr - UStar4 = densStar*v3_rr - elseif orientation == 2 # y-direction - UStar2 = densStar*v1_rr - UStar3 = densStar*SStar - UStar4 = densStar*v3_rr - else # z-direction - UStar2 = densStar*v1_rr - UStar3 = densStar*v2_rr - UStar4 = densStar*SStar - end - f1 = f_rr[1]+Ssr*(UStar1 - rho_rr) - f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr) - f3 = f_rr[3]+Ssr*(UStar3 - rho_v2_rr) - f4 = f_rr[4]+Ssr*(UStar4 - rho_v3_rr) - f5 = f_rr[5]+Ssr*(UStar5 - rho_e_rr) + SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) / + (rho_ll * sMu_L - rho_rr * sMu_R) + if Ssl <= 0.0 <= SStar + densStar = rho_ll * sMu_L / (Ssl - SStar) + enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L)) + UStar1 = densStar + UStar5 = densStar * enerStar + if orientation == 1 # x-direction + UStar2 = densStar * SStar + UStar3 = densStar * v2_ll + UStar4 = densStar * v3_ll + elseif orientation == 2 # y-direction + UStar2 = densStar * v1_ll + UStar3 = densStar * SStar + UStar4 = densStar * v3_ll + else # z-direction + UStar2 = densStar * v1_ll + UStar3 = densStar * v2_ll + UStar4 = densStar * SStar + end + f1 = f_ll[1] + Ssl * (UStar1 - rho_ll) + f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll) + f3 = f_ll[3] + Ssl * (UStar3 - rho_v2_ll) + f4 = f_ll[4] + Ssl * (UStar4 - rho_v3_ll) + f5 = f_ll[5] + Ssl * (UStar5 - rho_e_ll) + else + densStar = rho_rr * sMu_R / (Ssr - SStar) + enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R)) + UStar1 = densStar + UStar5 = densStar * enerStar + if orientation == 1 # x-direction + UStar2 = densStar * SStar + UStar3 = densStar * v2_rr + UStar4 = densStar * v3_rr + elseif orientation == 2 # y-direction + UStar2 = densStar * v1_rr + UStar3 = densStar * SStar + UStar4 = densStar * v3_rr + else # z-direction + UStar2 = densStar * v1_rr + UStar3 = densStar * v2_rr + UStar4 = densStar * SStar + end + f1 = f_rr[1] + Ssr * (UStar1 - rho_rr) + f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr) + f3 = f_rr[3] + Ssr * (UStar3 - rho_v2_rr) + f4 = f_rr[4] + Ssr * (UStar4 - rho_v3_rr) + f5 = f_rr[5] + Ssr * (UStar5 - rho_e_rr) + end end - end - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end - """ flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D) @@ -1237,221 +1288,220 @@ of the numerical flux. On Godunov-type methods near low densities. [DOI: 10.1016/0021-9991(91)90211-3](https://doi.org/10.1016/0021-9991(91)90211-3) """ -function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D) - # Calculate primitive variables, enthalpy and speed of sound - rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) - - # `u_ll[5]` is total energy `rho_e_ll` on the left - H_ll = (u_ll[5] + p_ll) / rho_ll - c_ll = sqrt(equations.gamma * p_ll / rho_ll) - - # `u_rr[5]` is total energy `rho_e_rr` on the right - H_rr = (u_rr[5] + p_rr) / rho_rr - c_rr = sqrt(equations.gamma * p_rr / rho_rr) - - # Compute Roe averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) - - v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho - v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho - v3_roe = (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr) * inv_sum_sqrt_rho - v_roe_mag = v1_roe^2 + v2_roe^2 + v3_roe^2 - - H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho - c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) - - # Compute convenience constant for positivity preservation, see - # https://doi.org/10.1016/0021-9991(91)90211-3 - beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) - - # Estimate the edges of the Riemann fan (with positivity conservation) - if orientation == 1 # x-direction - SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe)) - SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe)) - elseif orientation == 2 # y-direction - SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe)) - SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe)) - else # z-direction - SsL = min(v3_roe - c_roe, v3_ll - beta * c_ll, zero(v3_roe)) - SsR = max(v3_roe + c_roe, v3_rr + beta * c_rr, zero(v3_roe)) - end - - if SsL >= 0.0 && SsR > 0.0 - # Positive supersonic speed - f_ll = flux(u_ll, orientation, equations) - - f1 = f_ll[1] - f2 = f_ll[2] - f3 = f_ll[3] - f4 = f_ll[4] - f5 = f_ll[5] - elseif SsR <= 0.0 && SsL < 0.0 - # Negative supersonic speed - f_rr = flux(u_rr, orientation, equations) - - f1 = f_rr[1] - f2 = f_rr[2] - f3 = f_rr[3] - f4 = f_rr[4] - f5 = f_rr[5] - else - # Subsonic case - # Compute left and right fluxes - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) +function flux_hlle(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerEquations3D) + # Calculate primitive variables, enthalpy and speed of sound + rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations) + + # `u_ll[5]` is total energy `rho_e_ll` on the left + H_ll = (u_ll[5] + p_ll) / rho_ll + c_ll = sqrt(equations.gamma * p_ll / rho_ll) + + # `u_rr[5]` is total energy `rho_e_rr` on the right + H_rr = (u_rr[5] + p_rr) / rho_rr + c_rr = sqrt(equations.gamma * p_rr / rho_rr) + + # Compute Roe averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr) + + v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho + v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho + v3_roe = (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr) * inv_sum_sqrt_rho + v_roe_mag = v1_roe^2 + v2_roe^2 + v3_roe^2 + + H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho + c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag)) + + # Compute convenience constant for positivity preservation, see + # https://doi.org/10.1016/0021-9991(91)90211-3 + beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma) + + # Estimate the edges of the Riemann fan (with positivity conservation) + if orientation == 1 # x-direction + SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe)) + SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe)) + elseif orientation == 2 # y-direction + SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe)) + SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe)) + else # z-direction + SsL = min(v3_roe - c_roe, v3_ll - beta * c_ll, zero(v3_roe)) + SsR = max(v3_roe + c_roe, v3_rr + beta * c_rr, zero(v3_roe)) + end - f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL) - f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL) - f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL) - f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / (SsR - SsL) - f5 = (SsR * f_ll[5] - SsL * f_rr[5] + SsL * SsR * (u_rr[5] - u_ll[5])) / (SsR - SsL) - end + if SsL >= 0.0 && SsR > 0.0 + # Positive supersonic speed + f_ll = flux(u_ll, orientation, equations) + + f1 = f_ll[1] + f2 = f_ll[2] + f3 = f_ll[3] + f4 = f_ll[4] + f5 = f_ll[5] + elseif SsR <= 0.0 && SsL < 0.0 + # Negative supersonic speed + f_rr = flux(u_rr, orientation, equations) + + f1 = f_rr[1] + f2 = f_rr[2] + f3 = f_rr[3] + f4 = f_rr[4] + f5 = f_rr[5] + else + # Subsonic case + # Compute left and right fluxes + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / + (SsR - SsL) + f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / + (SsR - SsL) + f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / + (SsR - SsL) + f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / + (SsR - SsL) + f5 = (SsR * f_ll[5] - SsL * f_rr[5] + SsL * SsR * (u_rr[5] - u_ll[5])) / + (SsR - SsL) + end - return SVector(f1, f2, f3, f4, f5) + return SVector(f1, f2, f3, f4, f5) end - @inline function max_abs_speeds(u, equations::CompressibleEulerEquations3D) - rho, v1, v2, v3, p = cons2prim(u, equations) - c = sqrt(equations.gamma * p / rho) + rho, v1, v2, v3, p = cons2prim(u, equations) + c = sqrt(equations.gamma * p / rho) - return abs(v1) + c, abs(v2) + c, abs(v3) + c + return abs(v1) + c, abs(v2) + c, abs(v3) + c end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u + rho, rho_v1, rho_v2, rho_v3, rho_e = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - return SVector(rho, v1, v2, v3, p) + return SVector(rho, v1, v2, v3, p) end # Convert conservative variables to entropy @inline function cons2entropy(u, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p - - w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = rho_p * v2 - w4 = rho_p * v3 - w5 = -rho_p - - return SVector(w1, w2, w3, w4, w5) + rho, rho_v1, rho_v2, rho_v3, rho_e = u + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p + + w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = rho_p * v2 + w4 = rho_p * v3 + w5 = -rho_p + + return SVector(w1, w2, w3, w4, w5) end @inline function entropy2cons(w, equations::CompressibleEulerEquations3D) - # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD - # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) - @unpack gamma = equations - - # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) - # instead of `-rho * s / (gamma - 1)` - V1, V2, V3, V4, V5 = w .* (gamma-1) - - # s = specific entropy, eq. (53) - V_square = V2^2 + V3^2 + V4^2 - s = gamma - V1 + V_square/(2*V5) - - # eq. (52) - rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one) - - # eq. (51) - rho = -rho_iota * V5 - rho_v1 = rho_iota * V2 - rho_v2 = rho_iota * V3 - rho_v3 = rho_iota * V4 - rho_e = rho_iota*(1-V_square/(2*V5)) - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) + # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD + # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) + @unpack gamma = equations + + # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986) + # instead of `-rho * s / (gamma - 1)` + V1, V2, V3, V4, V5 = w .* (gamma - 1) + + # s = specific entropy, eq. (53) + V_square = V2^2 + V3^2 + V4^2 + s = gamma - V1 + V_square / (2 * V5) + + # eq. (52) + rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * + exp(-s * equations.inv_gamma_minus_one) + + # eq. (51) + rho = -rho_iota * V5 + rho_v1 = rho_iota * V2 + rho_v2 = rho_iota * V3 + rho_v3 = rho_iota * V4 + rho_e = rho_iota * (1 - V_square / (2 * V5)) + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::CompressibleEulerEquations3D) - rho, v1, v2, v3, p = prim - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 - rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) + rho, v1, v2, v3, p = prim + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 + rho_e = p * equations.inv_gamma_minus_one + + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e) end - @inline function density(u, equations::CompressibleEulerEquations3D) - rho = u[1] - return rho + rho = u[1] + return rho end - @inline function pressure(u, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) - return p + rho, rho_v1, rho_v2, rho_v3, rho_e = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) + return p end - @inline function density_pressure(u, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u - rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2)) - return rho_times_p + rho, rho_v1, rho_v2, rho_v3, rho_e = u + rho_times_p = (equations.gamma - 1) * + (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2)) + return rho_times_p end - # Calculate thermodynamic entropy for a conservative state `u` @inline function entropy_thermodynamic(u, equations::CompressibleEulerEquations3D) - rho, _ = u - p = pressure(u, equations) + rho, _ = u + p = pressure(u, equations) - # Thermodynamic entropy - s = log(p) - equations.gamma * log(rho) + # Thermodynamic entropy + s = log(p) - equations.gamma * log(rho) - return s + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::CompressibleEulerEquations3D) - S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one - # Mathematical entropy + S = -entropy_thermodynamic(cons, equations) * cons[1] * + equations.inv_gamma_minus_one + # Mathematical entropy - return S + return S end - # Default entropy is the mathematical entropy -@inline entropy(cons, equations::CompressibleEulerEquations3D) = entropy_math(cons, equations) - +@inline function entropy(cons, equations::CompressibleEulerEquations3D) + entropy_math(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::CompressibleEulerEquations3D) = cons[5] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::CompressibleEulerEquations3D) - rho, rho_v1, rho_v2, rho_v3, _ = u - return 0.5 * (rho_v1^2 + rho_v2^2 +rho_v3^2) / rho + rho, rho_v1, rho_v2, rho_v3, _ = u + return 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::CompressibleEulerEquations3D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - - end # @muladd diff --git a/src/equations/compressible_euler_multicomponent_1d.jl b/src/equations/compressible_euler_multicomponent_1d.jl index c5a3579ab3e..4a50d60471a 100644 --- a/src/equations/compressible_euler_multicomponent_1d.jl +++ b/src/equations/compressible_euler_multicomponent_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" CompressibleEulerMulticomponentEquations1D(; gammas, gas_constants) @@ -47,59 +47,74 @@ In case of more than one component, the specific heat ratios `gammas` and the ga The remaining variables like the specific heats at constant volume 'cv' or the specific heats at constant pressure 'cp' are then calculated considering a calorically perfect gas. """ -struct CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT<:Real} <: AbstractCompressibleEulerMulticomponentEquations{1, NVARS, NCOMP} - gammas ::SVector{NCOMP, RealT} - gas_constants ::SVector{NCOMP, RealT} - cv ::SVector{NCOMP, RealT} - cp ::SVector{NCOMP, RealT} - - function CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas ::SVector{NCOMP, RealT}, - gas_constants ::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real} - - NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) - - cv = gas_constants ./ (gammas .- 1) - cp = gas_constants + gas_constants ./ (gammas .- 1) - - new(gammas, gas_constants, cv, cp) - end +struct CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT <: Real} <: + AbstractCompressibleEulerMulticomponentEquations{1, NVARS, NCOMP} + gammas::SVector{NCOMP, RealT} + gas_constants::SVector{NCOMP, RealT} + cv::SVector{NCOMP, RealT} + cp::SVector{NCOMP, RealT} + + function CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas::SVector{ + NCOMP, + RealT + }, + gas_constants::SVector{ + NCOMP, + RealT + }) where { + NVARS, + NCOMP, + RealT <: + Real + } + NCOMP >= 1 || + throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) + + cv = gas_constants ./ (gammas .- 1) + cp = gas_constants + gas_constants ./ (gammas .- 1) + + new(gammas, gas_constants, cv, cp) + end end - function CompressibleEulerMulticomponentEquations1D(; gammas, gas_constants) + _gammas = promote(gammas...) + _gas_constants = promote(gas_constants...) + RealT = promote_type(eltype(_gammas), eltype(_gas_constants), + typeof(gas_constants[1] / (gammas[1] - 1))) - _gammas = promote(gammas...) - _gas_constants = promote(gas_constants...) - RealT = promote_type(eltype(_gammas), eltype(_gas_constants), typeof(gas_constants[1] / (gammas[1] - 1))) + NVARS = length(_gammas) + 2 + NCOMP = length(_gammas) - NVARS = length(_gammas) + 2 - NCOMP = length(_gammas) + __gammas = SVector(map(RealT, _gammas)) + __gas_constants = SVector(map(RealT, _gas_constants)) - __gammas = SVector(map(RealT, _gammas)) - __gas_constants = SVector(map(RealT, _gas_constants)) - - return CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, __gas_constants) + return CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, + __gas_constants) end - -@inline Base.real(::CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT - - -function varnames(::typeof(cons2cons), equations::CompressibleEulerMulticomponentEquations1D) - - cons = ("rho_v1", "rho_e") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (cons..., rhos...) +@inline function Base.real(::CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, + RealT}) where { + NVARS, + NCOMP, + RealT + } + RealT end - -function varnames(::typeof(cons2prim), equations::CompressibleEulerMulticomponentEquations1D) - - prim = ("v1", "p") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (prim..., rhos...) +function varnames(::typeof(cons2cons), + equations::CompressibleEulerMulticomponentEquations1D) + cons = ("rho_v1", "rho_e") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (cons..., rhos...) end +function varnames(::typeof(cons2prim), + equations::CompressibleEulerMulticomponentEquations1D) + prim = ("v1", "p") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (prim..., rhos...) +end # Set initial conditions at physical location `x` for time `t` @@ -110,27 +125,32 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equations::CompressibleEulerMulticomponentEquations1D) - c = 2 - A = 0.1 - L = 2 - f = 1/L - omega = 2 * pi * f - ini = c + A * sin(omega * (x[1] - t)) +function initial_condition_convergence_test(x, t, + equations::CompressibleEulerMulticomponentEquations1D) + c = 2 + A = 0.1 + L = 2 + f = 1 / L + omega = 2 * pi * f + ini = c + A * sin(omega * (x[1] - t)) - v1 = 1.0 + v1 = 1.0 - rho = ini + rho = ini - # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1) - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) + # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1) + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) - prim1 = rho * v1 - prim2 = rho^2 + prim1 = rho * v1 + prim2 = rho^2 - prim_other = SVector{2, real(equations)}(prim1, prim2) + prim_other = SVector{2, real(equations)}(prim1, prim2) - return vcat(prim_other, prim_rho) + return vcat(prim_other, prim_rho) end """ @@ -140,32 +160,33 @@ Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerMulticomponentEquations1D) - # Same settings as in `initial_condition` - c = 2 - A = 0.1 - L = 2 - f = 1/L - omega = 2 * pi * f +@inline function source_terms_convergence_test(u, x, t, + equations::CompressibleEulerMulticomponentEquations1D) + # Same settings as in `initial_condition` + c = 2 + A = 0.1 + L = 2 + f = 1 / L + omega = 2 * pi * f - gamma = totalgamma(u, equations) + gamma = totalgamma(u, equations) - x1, = x - si, co = sincos((t - x1)*omega) - tmp = (-((4 * si * A - 4c) + 1) * (gamma - 1) * co * A * omega) / 2 + x1, = x + si, co = sincos((t - x1) * omega) + tmp = (-((4 * si * A - 4c) + 1) * (gamma - 1) * co * A * omega) / 2 - # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1 - du_rho = SVector{ncomponents(equations), real(equations)}(0.0 for i in eachcomponent(equations)) + # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1 + du_rho = SVector{ncomponents(equations), real(equations)}(0.0 + for i in eachcomponent(equations)) - du1 = tmp - du2 = tmp + du1 = tmp + du2 = tmp - du_other = SVector{2, real(equations)}(du1, du2) + du_other = SVector{2, real(equations)}(du1, du2) - return vcat(du_other, du_rho) + return vcat(du_other, du_rho) end - """ initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations1D) @@ -174,44 +195,53 @@ A for multicomponent adapted weak blast wave adapted to multicomponent and taken A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations1D) - # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - inicenter = SVector(0.0) - x_norm = x[1] - inicenter[1] - r = abs(x_norm) - cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm) - - prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations)) - - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - p = r > 0.5 ? 1.0 : 1.245 - - prim_other = SVector{2, real(equations)}(v1, p) - - return prim2cons(vcat(prim_other, prim_rho), equations) +function initial_condition_weak_blast_wave(x, t, + equations::CompressibleEulerMulticomponentEquations1D) + # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + inicenter = SVector(0.0) + x_norm = x[1] - inicenter[1] + r = abs(x_norm) + cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm) + + prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.0 : + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.1691 + for i in eachcomponent(equations)) + + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + p = r > 0.5 ? 1.0 : 1.245 + + prim_other = SVector{2, real(equations)}(v1, p) + + return prim2cons(vcat(prim_other, prim_rho), equations) end - # Calculate 1D flux for a single point -@inline function flux(u, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D) - rho_v1, rho_e = u +@inline function flux(u, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations1D) + rho_v1, rho_e = u - rho = density(u, equations) + rho = density(u, equations) - v1 = rho_v1/rho - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5 * rho * v1^2) + v1 = rho_v1 / rho + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * rho * v1^2) - f_rho = densities(u, v1, equations) - f1 = rho_v1 * v1 + p - f2 = (rho_e + p) * v1 + f_rho = densities(u, v1, equations) + f1 = rho_v1 * v1 + p + f2 = (rho_e + p) * v1 - f_other = SVector{2, real(equations)}(f1, f2) + f_other = SVector{2, real(equations)}(f1, f2) - return vcat(f_other, f_rho) + return vcat(f_other, f_rho) end - """ flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerMulticomponentEquations1D) @@ -220,61 +250,66 @@ Entropy conserving two-point flux by "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations"" arXiv:1904.00972v3 [math.NA] 4 Feb 2020 """ -@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D) - # Unpack left and right state - @unpack gammas, gas_constants, cv = equations - rho_v1_ll, rho_e_ll = u_ll - rho_v1_rr, rho_e_rr = u_rr - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+2], u_rr[i+2]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+2] + u_rr[i+2]) for i in eachcomponent(equations)) - - # Iterating over all partial densities - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - gamma_ll = totalgamma(u_ll, equations) - gamma_rr = totalgamma(u_rr, equations) - - # extract velocities - v1_ll = rho_v1_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v1_avg = 0.5 * (v1_ll + v1_rr) - v1_square = 0.5 * (v1_ll^2 + v1_rr^2) - v_sum = v1_avg - - enth = zero(v_sum) - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - - for i in eachcomponent(equations) - enth += rhok_avg[i] * gas_constants[i] - help1_ll += u_ll[i+2] * cv[i] - help1_rr += u_rr[i+2] * cv[i] - end - - T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll - T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr - T = 0.5 * (1.0/T_ll + 1.0/T_rr) - T_log = ln_mean(1.0/T_ll, 1.0/T_rr) - - # Calculate fluxes depending on orientation - help1 = zero(T_ll) - help2 = zero(T_rr) - - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] - end - f1 = (help2) * v1_avg + enth/T - f2 = (help1)/T_log - 0.5 * (v1_square) * (help2) + v1_avg * f1 - - f_other = SVector{2, real(equations)}(f1, f2) - - return vcat(f_other, f_rho) +@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations1D) + # Unpack left and right state + @unpack gammas, gas_constants, cv = equations + rho_v1_ll, rho_e_ll = u_ll + rho_v1_rr, rho_e_rr = u_rr + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 2], + u_rr[i + 2]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 2] + + u_rr[i + 2]) + for i in eachcomponent(equations)) + + # Iterating over all partial densities + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + gamma_ll = totalgamma(u_ll, equations) + gamma_rr = totalgamma(u_rr, equations) + + # extract velocities + v1_ll = rho_v1_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v1_avg = 0.5 * (v1_ll + v1_rr) + v1_square = 0.5 * (v1_ll^2 + v1_rr^2) + v_sum = v1_avg + + enth = zero(v_sum) + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + + for i in eachcomponent(equations) + enth += rhok_avg[i] * gas_constants[i] + help1_ll += u_ll[i + 2] * cv[i] + help1_rr += u_rr[i + 2] * cv[i] + end + + T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr + T = 0.5 * (1.0 / T_ll + 1.0 / T_rr) + T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr) + + # Calculate fluxes depending on orientation + help1 = zero(T_ll) + help2 = zero(T_rr) + + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = (help2) * v1_avg + enth / T + f2 = (help1) / T_log - 0.5 * (v1_square) * (help2) + v1_avg * f1 + + f_other = SVector{2, real(equations)}(f1, f2) + + return vcat(f_other, f_rho) end - """ flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerMulticomponentEquations1D) @@ -290,170 +325,180 @@ See also the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D) - # Unpack left and right state - @unpack gammas, gas_constants, cv = equations - rho_v1_ll, rho_e_ll = u_ll - rho_v1_rr, rho_e_rr = u_rr - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+2], u_rr[i+2]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+2] + u_rr[i+2]) for i in eachcomponent(equations)) - - # Iterating over all partial densities - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Calculating gamma - gamma = totalgamma(0.5*(u_ll+u_rr), equations) - inv_gamma_minus_one = 1/(gamma-1) - - # extract velocities - v1_ll = rho_v1_ll / rho_ll - v1_rr = rho_v1_rr / rho_rr - v1_avg = 0.5 * (v1_ll + v1_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr) - - # density flux - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - - # helpful variables - f_rho_sum = zero(v1_ll) - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - enth_ll = zero(v1_ll) - enth_rr = zero(v1_rr) - for i in eachcomponent(equations) - enth_ll += u_ll[i+2] * gas_constants[i] - enth_rr += u_rr[i+2] * gas_constants[i] - f_rho_sum += f_rho[i] - help1_ll += u_ll[i+2] * cv[i] - help1_rr += u_rr[i+2] * cv[i] - end - - # temperature and pressure - T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll - T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr - p_ll = T_ll * enth_ll - p_rr = T_rr * enth_rr - p_avg = 0.5 * (p_ll + p_rr) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - - # momentum and energy flux - f1 = f_rho_sum * v1_avg + p_avg - f2 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - f_other = SVector{2, real(equations)}(f1, f2) - - return vcat(f_other, f_rho) +@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations1D) + # Unpack left and right state + @unpack gammas, gas_constants, cv = equations + rho_v1_ll, rho_e_ll = u_ll + rho_v1_rr, rho_e_rr = u_rr + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 2], + u_rr[i + 2]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 2] + + u_rr[i + 2]) + for i in eachcomponent(equations)) + + # Iterating over all partial densities + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Calculating gamma + gamma = totalgamma(0.5 * (u_ll + u_rr), equations) + inv_gamma_minus_one = 1 / (gamma - 1) + + # extract velocities + v1_ll = rho_v1_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v1_avg = 0.5 * (v1_ll + v1_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr) + + # density flux + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + + # helpful variables + f_rho_sum = zero(v1_ll) + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + enth_ll = zero(v1_ll) + enth_rr = zero(v1_rr) + for i in eachcomponent(equations) + enth_ll += u_ll[i + 2] * gas_constants[i] + enth_rr += u_rr[i + 2] * gas_constants[i] + f_rho_sum += f_rho[i] + help1_ll += u_ll[i + 2] * cv[i] + help1_rr += u_rr[i + 2] * cv[i] + end + + # temperature and pressure + T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr + p_ll = T_ll * enth_ll + p_rr = T_rr * enth_rr + p_avg = 0.5 * (p_ll + p_rr) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + + # momentum and energy flux + f1 = f_rho_sum * v1_avg + p_avg + f2 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + f_other = SVector{2, real(equations)}(f1, f2) + + return vcat(f_other, f_rho) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D) - rho_v1_ll, rho_e_ll = u_ll - rho_v1_rr, rho_e_rr = u_rr - - # Calculate primitive variables and speed of sound - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - gamma_ll = totalgamma(u_ll, equations) - gamma_rr = totalgamma(u_rr, equations) - - v_ll = rho_v1_ll / rho_ll - v_rr = rho_v1_rr / rho_rr - - p_ll = (gamma_ll - 1) * (rho_e_ll - 1/2 * rho_ll * v_ll^2) - p_rr = (gamma_rr - 1) * (rho_e_rr - 1/2 * rho_rr * v_rr^2) - c_ll = sqrt(gamma_ll * p_ll / rho_ll) - c_rr = sqrt(gamma_rr * p_rr / rho_rr) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations1D) + rho_v1_ll, rho_e_ll = u_ll + rho_v1_rr, rho_e_rr = u_rr + + # Calculate primitive variables and speed of sound + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + gamma_ll = totalgamma(u_ll, equations) + gamma_rr = totalgamma(u_rr, equations) + + v_ll = rho_v1_ll / rho_ll + v_rr = rho_v1_rr / rho_rr + + p_ll = (gamma_ll - 1) * (rho_e_ll - 1 / 2 * rho_ll * v_ll^2) + p_rr = (gamma_rr - 1) * (rho_e_rr - 1 / 2 * rho_rr * v_rr^2) + c_ll = sqrt(gamma_ll * p_ll / rho_ll) + c_rr = sqrt(gamma_rr * p_rr / rho_rr) + + λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) end +@inline function max_abs_speeds(u, + equations::CompressibleEulerMulticomponentEquations1D) + rho_v1, rho_e = u -@inline function max_abs_speeds(u, equations::CompressibleEulerMulticomponentEquations1D) - rho_v1, rho_e = u + rho = density(u, equations) + v1 = rho_v1 / rho - rho = density(u, equations) - v1 = rho_v1 / rho + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 1 / 2 * rho * (v1^2)) + c = sqrt(gamma * p / rho) - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 1/2 * rho * (v1^2)) - c = sqrt(gamma * p / rho) - - return (abs(v1) + c, ) + return (abs(v1) + c,) end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleEulerMulticomponentEquations1D) - rho_v1, rho_e = u + rho_v1, rho_e = u - prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+2] for i in eachcomponent(equations)) + prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 2] + for i in eachcomponent(equations)) - rho = density(u, equations) - v1 = rho_v1 / rho - gamma = totalgamma(u, equations) + rho = density(u, equations) + v1 = rho_v1 / rho + gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2)) - prim_other = SVector{2, real(equations)}(v1, p) + p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2)) + prim_other = SVector{2, real(equations)}(v1, p) - return vcat(prim_other, prim_rho) + return vcat(prim_other, prim_rho) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::CompressibleEulerMulticomponentEquations1D) - @unpack cv, gammas = equations - v1, p = prim + @unpack cv, gammas = equations + v1, p = prim - RealT = eltype(prim) + RealT = eltype(prim) - cons_rho = SVector{ncomponents(equations), RealT}(prim[i+2] for i in eachcomponent(equations)) - rho = density(prim, equations) - gamma = totalgamma(prim, equations) + cons_rho = SVector{ncomponents(equations), RealT}(prim[i + 2] + for i in eachcomponent(equations)) + rho = density(prim, equations) + gamma = totalgamma(prim, equations) - rho_v1 = rho * v1 + rho_v1 = rho * v1 - rho_e = p/(gamma-1) + 0.5 * (rho_v1 * v1) + rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1) - cons_other = SVector{2, RealT}(rho_v1, rho_e) + cons_other = SVector{2, RealT}(rho_v1, rho_e) - return vcat(cons_other, cons_rho) + return vcat(cons_other, cons_rho) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::CompressibleEulerMulticomponentEquations1D) - @unpack cv, gammas, gas_constants = equations - rho_v1, rho_e = u - - rho = density(u, equations) - - help1 = zero(rho) - gas_constant = zero(rho) - for i in eachcomponent(equations) - help1 += u[i+2] * cv[i] - gas_constant += gas_constants[i] * (u[i+2]/rho) - end - - v1 = rho_v1 / rho - v_square = v1^2 - gamma = totalgamma(u, equations) - - p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - gamma * log(rho) - log(gas_constant) - rho_p = rho / p - T = (rho_e - 0.5 * rho * v_square) / (help1) - entrop_rho = SVector{ncomponents(equations), real(equations)}( gas_constant * ((gamma - s)/(gamma - 1.0) - (0.5 * v_square * rho_p)) for i in eachcomponent(equations)) - - w1 = gas_constant * v1 * rho_p - w2 = gas_constant * (-1.0 * rho_p) - - entrop_other = SVector{2, real(equations)}(w1, w2) - - return vcat(entrop_other, entrop_rho) + @unpack cv, gammas, gas_constants = equations + rho_v1, rho_e = u + + rho = density(u, equations) + + help1 = zero(rho) + gas_constant = zero(rho) + for i in eachcomponent(equations) + help1 += u[i + 2] * cv[i] + gas_constant += gas_constants[i] * (u[i + 2] / rho) + end + + v1 = rho_v1 / rho + v_square = v1^2 + gamma = totalgamma(u, equations) + + p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - gamma * log(rho) - log(gas_constant) + rho_p = rho / p + T = (rho_e - 0.5 * rho * v_square) / (help1) + entrop_rho = SVector{ncomponents(equations), real(equations)}(gas_constant * + ((gamma - s) / + (gamma - 1.0) - + (0.5 * v_square * + rho_p)) + for i in eachcomponent(equations)) + + w1 = gas_constant * v1 * rho_p + w2 = gas_constant * (-1.0 * rho_p) + + entrop_other = SVector{2, real(equations)}(w1, w2) + + return vcat(entrop_other, entrop_rho) end - """ totalgamma(u, equations::CompressibleEulerMulticomponentEquations1D) @@ -461,47 +506,42 @@ Function that calculates the total gamma out of all partial gammas using the partial density fractions as well as the partial specific heats at constant volume. """ @inline function totalgamma(u, equations::CompressibleEulerMulticomponentEquations1D) - @unpack cv, gammas = equations + @unpack cv, gammas = equations - help1 = zero(u[1]) - help2 = zero(u[1]) + help1 = zero(u[1]) + help2 = zero(u[1]) - for i in eachcomponent(equations) - help1 += u[i+2] * cv[i] * gammas[i] - help2 += u[i+2] * cv[i] - end + for i in eachcomponent(equations) + help1 += u[i + 2] * cv[i] * gammas[i] + help2 += u[i + 2] * cv[i] + end - return help1/help2 + return help1 / help2 end - @inline function pressure(u, equations::CompressibleEulerMulticomponentEquations1D) - rho_v1, rho_e = u + rho_v1, rho_e = u - rho = density(u, equations) - gamma = totalgamma(u, equations) + rho = density(u, equations) + gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2)/rho) + p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho) - return p + return p end - @inline function density(u, equations::CompressibleEulerMulticomponentEquations1D) - rho = zero(u[1]) - - for i in eachcomponent(equations) - rho += u[i+2] - end + rho = zero(u[1]) - return rho - end - - - @inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations1D) - - return SVector{ncomponents(equations), real(equations)}(u[i+2]*v for i in eachcomponent(equations)) - end + for i in eachcomponent(equations) + rho += u[i + 2] + end + return rho +end +@inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations1D) + return SVector{ncomponents(equations), real(equations)}(u[i + 2] * v + for i in eachcomponent(equations)) +end end # @muladd diff --git a/src/equations/compressible_euler_multicomponent_2d.jl b/src/equations/compressible_euler_multicomponent_2d.jl index bb91cfbcb4e..5a015777cb1 100644 --- a/src/equations/compressible_euler_multicomponent_2d.jl +++ b/src/equations/compressible_euler_multicomponent_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" CompressibleEulerMulticomponentEquations2D(; gammas, gas_constants) @@ -51,59 +51,74 @@ In case of more than one component, the specific heat ratios `gammas` and the ga The remaining variables like the specific heats at constant volume 'cv' or the specific heats at constant pressure 'cp' are then calculated considering a calorically perfect gas. """ -struct CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT<:Real} <: AbstractCompressibleEulerMulticomponentEquations{2, NVARS, NCOMP} - gammas ::SVector{NCOMP, RealT} - gas_constants ::SVector{NCOMP, RealT} - cv ::SVector{NCOMP, RealT} - cp ::SVector{NCOMP, RealT} - - function CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas ::SVector{NCOMP, RealT}, - gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real} - - NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) - - cv = gas_constants ./ (gammas .- 1) - cp = gas_constants + gas_constants ./ (gammas .- 1) - - new(gammas, gas_constants,cv, cp) - end +struct CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT <: Real} <: + AbstractCompressibleEulerMulticomponentEquations{2, NVARS, NCOMP} + gammas::SVector{NCOMP, RealT} + gas_constants::SVector{NCOMP, RealT} + cv::SVector{NCOMP, RealT} + cp::SVector{NCOMP, RealT} + + function CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas::SVector{ + NCOMP, + RealT + }, + gas_constants::SVector{ + NCOMP, + RealT + }) where { + NVARS, + NCOMP, + RealT <: + Real + } + NCOMP >= 1 || + throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) + + cv = gas_constants ./ (gammas .- 1) + cp = gas_constants + gas_constants ./ (gammas .- 1) + + new(gammas, gas_constants, cv, cp) + end end - function CompressibleEulerMulticomponentEquations2D(; gammas, gas_constants) + _gammas = promote(gammas...) + _gas_constants = promote(gas_constants...) + RealT = promote_type(eltype(_gammas), eltype(_gas_constants), + typeof(gas_constants[1] / (gammas[1] - 1))) - _gammas = promote(gammas...) - _gas_constants = promote(gas_constants...) - RealT = promote_type(eltype(_gammas), eltype(_gas_constants), typeof(gas_constants[1] / (gammas[1] - 1))) - - NVARS = length(_gammas) + 3 - NCOMP = length(_gammas) + NVARS = length(_gammas) + 3 + NCOMP = length(_gammas) - __gammas = SVector(map(RealT, _gammas)) - __gas_constants = SVector(map(RealT, _gas_constants)) + __gammas = SVector(map(RealT, _gammas)) + __gas_constants = SVector(map(RealT, _gas_constants)) - return CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, __gas_constants) + return CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, + __gas_constants) end - -@inline Base.real(::CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT - - -function varnames(::typeof(cons2cons), equations::CompressibleEulerMulticomponentEquations2D) - - cons = ("rho_v1", "rho_v2", "rho_e") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (cons..., rhos...) +@inline function Base.real(::CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, + RealT}) where { + NVARS, + NCOMP, + RealT + } + RealT end - -function varnames(::typeof(cons2prim), equations::CompressibleEulerMulticomponentEquations2D) - - prim = ("v1", "v2", "p") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (prim..., rhos...) +function varnames(::typeof(cons2cons), + equations::CompressibleEulerMulticomponentEquations2D) + cons = ("rho_v1", "rho_v2", "rho_e") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (cons..., rhos...) end +function varnames(::typeof(cons2prim), + equations::CompressibleEulerMulticomponentEquations2D) + prim = ("v1", "v2", "p") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (prim..., rhos...) +end # Set initial conditions at physical location `x` for time `t` @@ -114,29 +129,34 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equations::CompressibleEulerMulticomponentEquations2D) - c = 2 - A = 0.1 - L = 2 - f = 1/L - omega = 2 * pi * f - ini = c + A * sin(omega * (x[1] + x[2] - t)) - - v1 = 1.0 - v2 = 1.0 - - rho = ini - - # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1) - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) - - prim1 = rho * v1 - prim2 = rho * v2 - prim3 = rho^2 - - prim_other = SVector{3, real(equations)}(prim1, prim2, prim3) - - return vcat(prim_other, prim_rho) +function initial_condition_convergence_test(x, t, + equations::CompressibleEulerMulticomponentEquations2D) + c = 2 + A = 0.1 + L = 2 + f = 1 / L + omega = 2 * pi * f + ini = c + A * sin(omega * (x[1] + x[2] - t)) + + v1 = 1.0 + v2 = 1.0 + + rho = ini + + # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1) + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) + + prim1 = rho * v1 + prim2 = rho * v2 + prim3 = rho^2 + + prim_other = SVector{3, real(equations)}(prim1, prim2, prim3) + + return vcat(prim_other, prim_rho) end """ @@ -146,38 +166,42 @@ Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerMulticomponentEquations2D) - # Same settings as in `initial_condition` - c = 2 - A = 0.1 - L = 2 - f = 1/L - omega = 2 * pi * f - - gamma = totalgamma(u, equations) - - x1, x2 = x - si, co = sincos((x1 + x2 - t)*omega) - tmp1 = co * A * omega - tmp2 = si * A - tmp3 = gamma - 1 - tmp4 = (2*c - 1)*tmp3 - tmp5 = (2*tmp2*gamma - 2*tmp2 + tmp4 + 1)*tmp1 - tmp6 = tmp2 + c - - # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1 - du_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * tmp1 for i in eachcomponent(equations)) - - du1 = tmp5 - du2 = tmp5 - du3 = 2*((tmp6 - 1.0)*tmp3 + tmp6*gamma)*tmp1 - - du_other = SVector{3, real(equations)}(du1, du2, du3) - - return vcat(du_other, du_rho) +@inline function source_terms_convergence_test(u, x, t, + equations::CompressibleEulerMulticomponentEquations2D) + # Same settings as in `initial_condition` + c = 2 + A = 0.1 + L = 2 + f = 1 / L + omega = 2 * pi * f + + gamma = totalgamma(u, equations) + + x1, x2 = x + si, co = sincos((x1 + x2 - t) * omega) + tmp1 = co * A * omega + tmp2 = si * A + tmp3 = gamma - 1 + tmp4 = (2 * c - 1) * tmp3 + tmp5 = (2 * tmp2 * gamma - 2 * tmp2 + tmp4 + 1) * tmp1 + tmp6 = tmp2 + c + + # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1 + du_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + tmp1 + for i in eachcomponent(equations)) + + du1 = tmp5 + du2 = tmp5 + du3 = 2 * ((tmp6 - 1.0) * tmp3 + tmp6 * gamma) * tmp1 + + du_other = SVector{3, real(equations)}(du1, du2, du3) + + return vcat(du_other, du_rho) end - """ initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations2D) @@ -186,56 +210,65 @@ A for multicomponent adapted weak blast wave taken from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations2D) - # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Set up polar coordinates - inicenter = SVector(0.0, 0.0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - r = sqrt(x_norm^2 + y_norm^2) - phi = atan(y_norm, x_norm) - sin_phi, cos_phi = sincos(phi) - - prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations)) - - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi - p = r > 0.5 ? 1.0 : 1.245 - - prim_other = SVector{3, real(equations)}(v1, v2, p) - - return prim2cons(vcat(prim_other, prim_rho),equations) +function initial_condition_weak_blast_wave(x, t, + equations::CompressibleEulerMulticomponentEquations2D) + # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Set up polar coordinates + inicenter = SVector(0.0, 0.0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + phi = atan(y_norm, x_norm) + sin_phi, cos_phi = sincos(phi) + + prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.0 : + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.1691 + for i in eachcomponent(equations)) + + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi + p = r > 0.5 ? 1.0 : 1.245 + + prim_other = SVector{3, real(equations)}(v1, v2, p) + + return prim2cons(vcat(prim_other, prim_rho), equations) end - # Calculate 1D flux for a single point -@inline function flux(u, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D) - rho_v1, rho_v2, rho_e = u - - rho = density(u, equations) - - v1 = rho_v1/rho - v2 = rho_v2/rho - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2)) - - if orientation == 1 - f_rho = densities(u, v1, equations) - f1 = rho_v1 * v1 + p - f2 = rho_v2 * v1 - f3 = (rho_e + p) * v1 - else - f_rho = densities(u, v2, equations) - f1 = rho_v1 * v2 - f2 = rho_v2 * v2 + p - f3 = (rho_e + p) * v2 - end - - f_other = SVector{3, real(equations)}(f1, f2, f3) - - return vcat(f_other, f_rho) -end +@inline function flux(u, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations2D) + rho_v1, rho_v2, rho_e = u + + rho = density(u, equations) + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2)) + + if orientation == 1 + f_rho = densities(u, v1, equations) + f1 = rho_v1 * v1 + p + f2 = rho_v2 * v1 + f3 = (rho_e + p) * v1 + else + f_rho = densities(u, v2, equations) + f1 = rho_v1 * v2 + f2 = rho_v2 * v2 + p + f3 = (rho_e + p) * v2 + end + + f_other = SVector{3, real(equations)}(f1, f2, f3) + return vcat(f_other, f_rho) +end """ flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerMulticomponentEquations2D) @@ -245,72 +278,80 @@ Adaption of the entropy conserving two-point flux by "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations"" arXiv:1904.00972v3 [math.NA] 4 Feb 2020 """ -@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D) - # Unpack left and right state - @unpack gammas, gas_constants, cv = equations - rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+3], u_rr[i+3]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+3] + u_rr[i+3]) for i in eachcomponent(equations)) - - # Iterating over all partial densities - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # extract velocities - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v1_square = 0.5 * (v1_ll^2 + v1_rr^2) - v2_square = 0.5 * (v2_ll^2 + v2_rr^2) - v_sum = v1_avg + v2_avg - - enth = zero(v_sum) - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - - for i in eachcomponent(equations) - enth += rhok_avg[i] * gas_constants[i] - help1_ll += u_ll[i+3] * cv[i] - help1_rr += u_rr[i+3] * cv[i] - end - - T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll - T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr - T = 0.5 * (1.0/T_ll + 1.0/T_rr) - T_log = ln_mean(1.0/T_ll, 1.0/T_rr) - - # Calculate fluxes depending on orientation - help1 = zero(T_ll) - help2 = zero(T_rr) - if orientation == 1 - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) +@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations2D) + # Unpack left and right state + @unpack gammas, gas_constants, cv = equations + rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 3], + u_rr[i + 3]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 3] + + u_rr[i + 3]) + for i in eachcomponent(equations)) + + # Iterating over all partial densities + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # extract velocities + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v1_square = 0.5 * (v1_ll^2 + v1_rr^2) + v2_square = 0.5 * (v2_ll^2 + v2_rr^2) + v_sum = v1_avg + v2_avg + + enth = zero(v_sum) + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] + enth += rhok_avg[i] * gas_constants[i] + help1_ll += u_ll[i + 3] * cv[i] + help1_rr += u_rr[i + 3] * cv[i] end - f1 = (help2) * v1_avg + enth/T - f2 = (help2) * v2_avg - f3 = (help1)/T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + v2_avg * f2 - else - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] + + T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr + T = 0.5 * (1.0 / T_ll + 1.0 / T_rr) + T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr) + + # Calculate fluxes depending on orientation + help1 = zero(T_ll) + help2 = zero(T_rr) + if orientation == 1 + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = (help2) * v1_avg + enth / T + f2 = (help2) * v2_avg + f3 = (help1) / T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + + v2_avg * f2 + else + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = (help2) * v1_avg + f2 = (help2) * v2_avg + enth / T + f3 = (help1) / T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + + v2_avg * f2 end - f1 = (help2) * v1_avg - f2 = (help2) * v2_avg + enth/T - f3 = (help1)/T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + v2_avg * f2 - end - f_other = SVector{3, real(equations)}(f1, f2, f3) + f_other = SVector{3, real(equations)}(f1, f2, f3) - return vcat(f_other, f_rho) + return vcat(f_other, f_rho) end - """ flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerMulticomponentEquations2D) @@ -326,194 +367,206 @@ See also the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D) - # Unpack left and right state - @unpack gammas, gas_constants, cv = equations - rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+3], u_rr[i+3]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+3] + u_rr[i+3]) for i in eachcomponent(equations)) - - # Iterating over all partial densities - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Calculating gamma - gamma = totalgamma(0.5*(u_ll+u_rr), equations) - inv_gamma_minus_one = 1/(gamma-1) - - # extract velocities - v1_ll = rho_v1_ll / rho_ll - v1_rr = rho_v1_rr / rho_rr - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_ll = rho_v2_ll / rho_ll - v2_rr = rho_v2_rr / rho_rr - v2_avg = 0.5 * (v2_ll + v2_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - - # helpful variables - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - enth_ll = zero(v1_ll) - enth_rr = zero(v1_rr) - for i in eachcomponent(equations) - enth_ll += u_ll[i+3] * gas_constants[i] - enth_rr += u_rr[i+3] * gas_constants[i] - help1_ll += u_ll[i+3] * cv[i] - help1_rr += u_rr[i+3] * cv[i] - end - - # temperature and pressure - T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll - T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr - p_ll = T_ll * enth_ll - p_rr = T_rr * enth_rr - p_avg = 0.5 * (p_ll + p_rr) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - - f_rho_sum = zero(T_rr) - if orientation == 1 - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) +@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations2D) + # Unpack left and right state + @unpack gammas, gas_constants, cv = equations + rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 3], + u_rr[i + 3]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 3] + + u_rr[i + 3]) + for i in eachcomponent(equations)) + + # Iterating over all partial densities + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Calculating gamma + gamma = totalgamma(0.5 * (u_ll + u_rr), equations) + inv_gamma_minus_one = 1 / (gamma - 1) + + # extract velocities + v1_ll = rho_v1_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_ll = rho_v2_ll / rho_ll + v2_rr = rho_v2_rr / rho_rr + v2_avg = 0.5 * (v2_ll + v2_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # helpful variables + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + enth_ll = zero(v1_ll) + enth_rr = zero(v1_rr) for i in eachcomponent(equations) - f_rho_sum += f_rho[i] + enth_ll += u_ll[i + 3] * gas_constants[i] + enth_rr += u_rr[i + 3] * gas_constants[i] + help1_ll += u_ll[i + 3] * cv[i] + help1_rr += u_rr[i + 3] * cv[i] end - f1 = f_rho_sum * v1_avg + p_avg - f2 = f_rho_sum * v2_avg - f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - else - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - f_rho_sum += f_rho[i] + + # temperature and pressure + T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr + p_ll = T_ll * enth_ll + p_rr = T_rr * enth_rr + p_avg = 0.5 * (p_ll + p_rr) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + + f_rho_sum = zero(T_rr) + if orientation == 1 + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + f_rho_sum += f_rho[i] + end + f1 = f_rho_sum * v1_avg + p_avg + f2 = f_rho_sum * v2_avg + f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + else + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + f_rho_sum += f_rho[i] + end + f1 = f_rho_sum * v1_avg + f2 = f_rho_sum * v2_avg + p_avg + f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + 0.5 * (p_ll * v2_rr + p_rr * v2_ll) end - f1 = f_rho_sum * v1_avg - f2 = f_rho_sum * v2_avg + p_avg - f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll) - end - # momentum and energy flux - f_other = SVector{3, real(equations)}(f1, f2, f3) + # momentum and energy flux + f_other = SVector{3, real(equations)}(f1, f2, f3) - return vcat(f_other, f_rho) + return vcat(f_other, f_rho) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D) - rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr - - # Get the density and gas gamma - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - gamma_ll = totalgamma(u_ll, equations) - gamma_rr = totalgamma(u_rr, equations) - - # Get the velocities based on direction - if orientation == 1 - v_ll = rho_v1_ll / rho_ll - v_rr = rho_v1_rr / rho_rr - else # orientation == 2 - v_ll = rho_v2_ll / rho_ll - v_rr = rho_v2_rr / rho_rr - end - - # Compute the sound speeds on the left and right - p_ll = (gamma_ll - 1) * (rho_e_ll - 1/2 * (rho_v1_ll^2 + rho_v2_ll^2) / rho_ll) - c_ll = sqrt(gamma_ll * p_ll / rho_ll) - p_rr = (gamma_rr - 1) * (rho_e_rr - 1/2 * (rho_v1_rr^2 + rho_v2_rr^2) / rho_rr) - c_rr = sqrt(gamma_rr * p_rr / rho_rr) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) -end +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::CompressibleEulerMulticomponentEquations2D) + rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr + + # Get the density and gas gamma + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + gamma_ll = totalgamma(u_ll, equations) + gamma_rr = totalgamma(u_rr, equations) + + # Get the velocities based on direction + if orientation == 1 + v_ll = rho_v1_ll / rho_ll + v_rr = rho_v1_rr / rho_rr + else # orientation == 2 + v_ll = rho_v2_ll / rho_ll + v_rr = rho_v2_rr / rho_rr + end + # Compute the sound speeds on the left and right + p_ll = (gamma_ll - 1) * (rho_e_ll - 1 / 2 * (rho_v1_ll^2 + rho_v2_ll^2) / rho_ll) + c_ll = sqrt(gamma_ll * p_ll / rho_ll) + p_rr = (gamma_rr - 1) * (rho_e_rr - 1 / 2 * (rho_v1_rr^2 + rho_v2_rr^2) / rho_rr) + c_rr = sqrt(gamma_rr * p_rr / rho_rr) -@inline function max_abs_speeds(u, equations::CompressibleEulerMulticomponentEquations2D) - rho_v1, rho_v2, rho_e = u + λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) +end - rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho +@inline function max_abs_speeds(u, + equations::CompressibleEulerMulticomponentEquations2D) + rho_v1, rho_v2, rho_e = u - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2)) - c = sqrt(gamma * p / rho) + rho = density(u, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho - return (abs(v1) + c, abs(v2) + c, ) -end + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2)) + c = sqrt(gamma * p / rho) + return (abs(v1) + c, abs(v2) + c) +end # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleEulerMulticomponentEquations2D) - rho_v1, rho_v2, rho_e = u + rho_v1, rho_v2, rho_e = u - prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+3] for i in eachcomponent(equations)) + prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 3] + for i in eachcomponent(equations)) - rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2)) - prim_other = SVector{3, real(equations)}(v1, v2, p) + rho = density(u, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2)) + prim_other = SVector{3, real(equations)}(v1, v2, p) - return vcat(prim_other, prim_rho) + return vcat(prim_other, prim_rho) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::CompressibleEulerMulticomponentEquations2D) - @unpack cv, gammas, gas_constants = equations - rho_v1, rho_v2, rho_e = u - - rho = density(u, equations) - - # Multicomponent stuff - help1 = zero(rho) - gas_constant = zero(rho) - for i in eachcomponent(equations) - help1 += u[i+3] * cv[i] - gas_constant += gas_constants[i] * (u[i+3]/rho) - end - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v_square = v1^2 + v2^2 - gamma = totalgamma(u, equations) - - p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - gamma * log(rho) - log(gas_constant) - rho_p = rho / p - T = (rho_e - 0.5 * rho * v_square) / (help1) - entrop_rho = SVector{ncomponents(equations), real(equations)}( gas_constant * ((gamma - s)/(gamma - 1.0) - (0.5 * v_square * rho_p)) for i in eachcomponent(equations)) + @unpack cv, gammas, gas_constants = equations + rho_v1, rho_v2, rho_e = u - w1 = gas_constant * v1 * rho_p - w2 = gas_constant * v2 * rho_p - w3 = gas_constant * rho_p * (-1) + rho = density(u, equations) - entrop_other = SVector{3, real(equations)}(w1, w2, w3) + # Multicomponent stuff + help1 = zero(rho) + gas_constant = zero(rho) + for i in eachcomponent(equations) + help1 += u[i + 3] * cv[i] + gas_constant += gas_constants[i] * (u[i + 3] / rho) + end - return vcat(entrop_other, entrop_rho) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v_square = v1^2 + v2^2 + gamma = totalgamma(u, equations) + + p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - gamma * log(rho) - log(gas_constant) + rho_p = rho / p + T = (rho_e - 0.5 * rho * v_square) / (help1) + entrop_rho = SVector{ncomponents(equations), real(equations)}(gas_constant * + ((gamma - s) / + (gamma - 1.0) - + (0.5 * v_square * + rho_p)) + for i in eachcomponent(equations)) + + w1 = gas_constant * v1 * rho_p + w2 = gas_constant * v2 * rho_p + w3 = gas_constant * rho_p * (-1) + + entrop_other = SVector{3, real(equations)}(w1, w2, w3) + + return vcat(entrop_other, entrop_rho) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::CompressibleEulerMulticomponentEquations2D) - @unpack cv, gammas = equations - v1, v2, p = prim + @unpack cv, gammas = equations + v1, v2, p = prim - cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+3] for i in eachcomponent(equations)) - rho = density(prim, equations) - gamma = totalgamma(prim, equations) + cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 3] + for i in eachcomponent(equations)) + rho = density(prim, equations) + gamma = totalgamma(prim, equations) - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_e = p/(gamma-1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2) + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2) - cons_other = SVector{3, real(equations)}(rho_v1, rho_v2, rho_e) + cons_other = SVector{3, real(equations)}(rho_v1, rho_v2, rho_e) - return vcat(cons_other, cons_rho) + return vcat(cons_other, cons_rho) end - """ totalgamma(u, equations::CompressibleEulerMulticomponentEquations2D) @@ -521,45 +574,42 @@ Function that calculates the total gamma out of all partial gammas using the partial density fractions as well as the partial specific heats at constant volume. """ @inline function totalgamma(u, equations::CompressibleEulerMulticomponentEquations2D) - @unpack cv, gammas = equations + @unpack cv, gammas = equations - help1 = zero(u[1]) - help2 = zero(u[1]) + help1 = zero(u[1]) + help2 = zero(u[1]) - for i in eachcomponent(equations) - help1 += u[i+3] * cv[i] * gammas[i] - help2 += u[i+3] * cv[i] - end + for i in eachcomponent(equations) + help1 += u[i + 3] * cv[i] * gammas[i] + help2 += u[i + 3] * cv[i] + end - return help1/help2 + return help1 / help2 end +@inline function density_pressure(u, + equations::CompressibleEulerMulticomponentEquations2D) + rho_v1, rho_v2, rho_e = u -@inline function density_pressure(u, equations::CompressibleEulerMulticomponentEquations2D) - rho_v1, rho_v2, rho_e = u + rho = density(u, equations) + gamma = totalgamma(u, equations) + rho_times_p = (gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2)) - rho = density(u, equations) - gamma = totalgamma(u, equations) - rho_times_p = (gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2)) - - return rho_times_p + return rho_times_p end - @inline function density(u, equations::CompressibleEulerMulticomponentEquations2D) - rho = zero(u[1]) + rho = zero(u[1]) - for i in eachcomponent(equations) - rho += u[i+3] - end - - return rho - end - - @inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations2D) - - return SVector{ncomponents(equations), real(equations)}(u[i+3]*v for i in eachcomponent(equations)) - end + for i in eachcomponent(equations) + rho += u[i + 3] + end + return rho +end +@inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations2D) + return SVector{ncomponents(equations), real(equations)}(u[i + 3] * v + for i in eachcomponent(equations)) +end end # @muladd diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl index 895fd2f2ae3..33badba15d9 100644 --- a/src/equations/compressible_navier_stokes_2d.jl +++ b/src/equations/compressible_navier_stokes_2d.jl @@ -76,19 +76,21 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = -\frac{\rho}{p} #!!! warning "Experimental code" # This code is experimental and may be changed or removed in any future release. """ -struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real, E <: AbstractCompressibleEulerEquations{2}} <: AbstractCompressibleNavierStokesDiffusion{2, 4} - # TODO: parabolic - # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations - # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - mu::RealT # viscosity - Pr::RealT # Prandtl number - kappa::RealT # thermal diffusivity for Fick's law - - equations_hyperbolic::E # CompressibleEulerEquations2D - gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy +struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real, + E <: AbstractCompressibleEulerEquations{2}} <: + AbstractCompressibleNavierStokesDiffusion{2, 4} + # TODO: parabolic + # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations + # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + mu::RealT # viscosity + Pr::RealT # Prandtl number + kappa::RealT # thermal diffusivity for Fick's law + + equations_hyperbolic::E # CompressibleEulerEquations2D + gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy end """ @@ -113,18 +115,19 @@ struct GradientVariablesEntropy end function CompressibleNavierStokesDiffusion2D(equations::CompressibleEulerEquations2D; mu, Prandtl, gradient_variables = GradientVariablesPrimitive()) - gamma = equations.gamma - inv_gamma_minus_one = equations.inv_gamma_minus_one - μ, Pr = promote(mu, Prandtl) - - # Under the assumption of constant Prandtl number the thermal conductivity - # constant is kappa = gamma μ / ((gamma-1) Pr). - # Important note! Factor of μ is accounted for later in `flux`. - kappa = gamma * inv_gamma_minus_one / Pr - - CompressibleNavierStokesDiffusion2D{typeof(gradient_variables), typeof(gamma), typeof(equations)}(gamma, inv_gamma_minus_one, - μ, Pr, kappa, - equations, gradient_variables) + gamma = equations.gamma + inv_gamma_minus_one = equations.inv_gamma_minus_one + μ, Pr = promote(mu, Prandtl) + + # Under the assumption of constant Prandtl number the thermal conductivity + # constant is kappa = gamma μ / ((gamma-1) Pr). + # Important note! Factor of μ is accounted for later in `flux`. + kappa = gamma * inv_gamma_minus_one / Pr + + CompressibleNavierStokesDiffusion2D{typeof(gradient_variables), typeof(gamma), + typeof(equations)}(gamma, inv_gamma_minus_one, + μ, Pr, kappa, + equations, gradient_variables) end # TODO: parabolic @@ -132,148 +135,169 @@ end # varnames(::typeof(cons2prim) , ::CompressibleNavierStokesDiffusion2D) = ("v1", "v2", "T") # varnames(::typeof(cons2entropy), ::CompressibleNavierStokesDiffusion2D) = ("w2", "w3", "w4") -varnames(variable_mapping, equations_parabolic::CompressibleNavierStokesDiffusion2D) = - varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +function varnames(variable_mapping, + equations_parabolic::CompressibleNavierStokesDiffusion2D) + varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +end # we specialize this function to compute gradients of primitive variables instead of # conservative variables. -gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) = cons2prim -gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) = cons2entropy - +function gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + cons2prim +end +function gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + cons2entropy +end # Explicit formulas for the diffusive Navier-Stokes fluxes are available, e.g., in Section 2 # of the paper by Rueda-Ramírez, Hennemann, Hindenlang, Winters, and Gassner # "An Entropy Stable Nodal Discontinuous Galerkin Method for the resistive # MHD Equations. Part II: Subcell Finite Volume Shock Capturing" # where one sets the magnetic field components equal to 0. -function flux(u, gradients, orientation::Integer, equations::CompressibleNavierStokesDiffusion2D) - # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`. - rho, v1, v2, _ = convert_transformed_to_primitive(u, equations) - # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, T) - # either computed directly or reverse engineered from the gradient of the entropy variables - # by way of the `convert_gradient_variables` function. - _, dv1dx, dv2dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations) - _, dv1dy, dv2dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations) - - # Components of viscous stress tensor - - # (4/3 * (v1)_x - 2/3 * (v2)_y) - tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * dv2dy - # ((v1)_y + (v2)_x) - # stress tensor is symmetric - tau_12 = dv1dy + dv2dx # = tau_21 - # (4/3 * (v2)_y - 2/3 * (v1)_x) - tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * dv1dx - - # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho)) - # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr) - # Note, the gas constant cancels under this formulation, so it is not present - # in the implementation - q1 = equations.kappa * dTdx - q2 = equations.kappa * dTdy - - # Constant dynamic viscosity is copied to a variable for readability. - # Offers flexibility for dynamic viscosity via Sutherland's law where it depends - # on temperature and reference values, Ts and Tref such that mu(T) - mu = equations.mu - - if orientation == 1 - # viscous flux components in the x-direction - f1 = zero(rho) - f2 = tau_11 * mu - f3 = tau_12 * mu - f4 = ( v1 * tau_11 + v2 * tau_12 + q1 ) * mu - - return SVector(f1, f2, f3, f4) - else # if orientation == 2 - # viscous flux components in the y-direction - # Note, symmetry is exploited for tau_12 = tau_21 - g1 = zero(rho) - g2 = tau_12 * mu # tau_21 * mu - g3 = tau_22 * mu - g4 = ( v1 * tau_12 + v2 * tau_22 + q2 ) * mu - - return SVector(g1, g2, g3, g4) - end +function flux(u, gradients, orientation::Integer, + equations::CompressibleNavierStokesDiffusion2D) + # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`. + rho, v1, v2, _ = convert_transformed_to_primitive(u, equations) + # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, T) + # either computed directly or reverse engineered from the gradient of the entropy variables + # by way of the `convert_gradient_variables` function. + _, dv1dx, dv2dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations) + _, dv1dy, dv2dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations) + + # Components of viscous stress tensor + + # (4/3 * (v1)_x - 2/3 * (v2)_y) + tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * dv2dy + # ((v1)_y + (v2)_x) + # stress tensor is symmetric + tau_12 = dv1dy + dv2dx # = tau_21 + # (4/3 * (v2)_y - 2/3 * (v1)_x) + tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * dv1dx + + # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho)) + # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr) + # Note, the gas constant cancels under this formulation, so it is not present + # in the implementation + q1 = equations.kappa * dTdx + q2 = equations.kappa * dTdy + + # Constant dynamic viscosity is copied to a variable for readability. + # Offers flexibility for dynamic viscosity via Sutherland's law where it depends + # on temperature and reference values, Ts and Tref such that mu(T) + mu = equations.mu + + if orientation == 1 + # viscous flux components in the x-direction + f1 = zero(rho) + f2 = tau_11 * mu + f3 = tau_12 * mu + f4 = (v1 * tau_11 + v2 * tau_12 + q1) * mu + + return SVector(f1, f2, f3, f4) + else # if orientation == 2 + # viscous flux components in the y-direction + # Note, symmetry is exploited for tau_12 = tau_21 + g1 = zero(rho) + g2 = tau_12 * mu # tau_21 * mu + g3 = tau_22 * mu + g4 = (v1 * tau_12 + v2 * tau_22 + q2) * mu + + return SVector(g1, g2, g3, g4) + end end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleNavierStokesDiffusion2D) - rho, rho_v1, rho_v2, _ = u + rho, rho_v1, rho_v2, _ = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - T = temperature(u, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + T = temperature(u, equations) - return SVector(rho, v1, v2, T) + return SVector(rho, v1, v2, T) end # Convert conservative variables to entropy # TODO: parabolic. We can improve efficiency by not computing w_1, which involves logarithms # This can be done by specializing `cons2entropy` and `entropy2cons` to `CompressibleNavierStokesDiffusion2D`, # but this may be confusing to new users. -cons2entropy(u, equations::CompressibleNavierStokesDiffusion2D) = cons2entropy(u, equations.equations_hyperbolic) -entropy2cons(w, equations::CompressibleNavierStokesDiffusion2D) = entropy2cons(w, equations.equations_hyperbolic) +function cons2entropy(u, equations::CompressibleNavierStokesDiffusion2D) + cons2entropy(u, equations.equations_hyperbolic) +end +function entropy2cons(w, equations::CompressibleNavierStokesDiffusion2D) + entropy2cons(w, equations.equations_hyperbolic) +end # the `flux` function takes in transformed variables `u` which depend on the type of the gradient variables. # For CNS, it is simplest to formulate the viscous terms in primitive variables, so we transform the transformed # variables into primitive variables. -@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - return u_transformed +@inline function convert_transformed_to_primitive(u_transformed, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + return u_transformed end # TODO: parabolic. Make this more efficient! -@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - # note: this uses CompressibleNavierStokesDiffusion2D versions of cons2prim and entropy2cons - return cons2prim(entropy2cons(u_transformed, equations), equations) +@inline function convert_transformed_to_primitive(u_transformed, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + # note: this uses CompressibleNavierStokesDiffusion2D versions of cons2prim and entropy2cons + return cons2prim(entropy2cons(u_transformed, equations), equations) end - # Takes the solution values `u` and gradient of the entropy variables (w_2, w_3, w_4) and # reverse engineers the gradients to be terms of the primitive variables (v1, v2, T). # Helpful because then the diffusive fluxes have the same form as on paper. # Note, the first component of `gradient_entropy_vars` contains gradient(rho) which is unused. # TODO: parabolic; entropy stable viscous terms -@inline function convert_derivative_to_primitive(u, gradient, ::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - return gradient +@inline function convert_derivative_to_primitive(u, gradient, + ::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + return gradient end # the first argument is always the "transformed" variables. @inline function convert_derivative_to_primitive(w, gradient_entropy_vars, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - - # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back. - # We can fix this if we directly compute v1, v2, T from the entropy variables - u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion2D - rho, rho_v1, rho_v2, _ = u - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - T = temperature(u, equations) - - return SVector(gradient_entropy_vars[1], - T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[4]), # grad(u) = T*(grad(w_2)+v1*grad(w_4)) - T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[4]), # grad(v) = T*(grad(w_3)+v2*grad(w_4)) - T * T * gradient_entropy_vars[4] # grad(T) = T^2*grad(w_4)) - ) + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + + # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back. + # We can fix this if we directly compute v1, v2, T from the entropy variables + u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion2D + rho, rho_v1, rho_v2, _ = u + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + T = temperature(u, equations) + + return SVector(gradient_entropy_vars[1], + T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[4]), # grad(u) = T*(grad(w_2)+v1*grad(w_4)) + T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[4]), # grad(v) = T*(grad(w_3)+v2*grad(w_4)) + T * T * gradient_entropy_vars[4]) end - # This routine is required because `prim2cons` is called in `initial_condition`, which # is called with `equations::CompressibleEulerEquations2D`. This means it is inconsistent # with `cons2prim(..., ::CompressibleNavierStokesDiffusion2D)` as defined above. # TODO: parabolic. Is there a way to clean this up? -@inline prim2cons(u, equations::CompressibleNavierStokesDiffusion2D) = +@inline function prim2cons(u, equations::CompressibleNavierStokesDiffusion2D) prim2cons(u, equations.equations_hyperbolic) - +end @inline function temperature(u, equations::CompressibleNavierStokesDiffusion2D) - rho, rho_v1, rho_v2, rho_e = u + rho, rho_v1, rho_v2, rho_e = u - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho) - T = p / rho - return T + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho) + T = p / rho + return T end # TODO: can we generalize this to MHD? @@ -289,8 +313,8 @@ to be boundary condition types such as the `NoSlip` velocity boundary condition This is an experimental feature and may change in future releases. """ struct BoundaryConditionNavierStokesWall{V, H} - boundary_condition_velocity::V - boundary_condition_heat_flux::H + boundary_condition_velocity::V + boundary_condition_heat_flux::H end """ @@ -302,7 +326,7 @@ and should return a `SVector{NDIMS}` whose entries are the velocity vector at a point `x` and time `t`. """ struct NoSlip{F} - boundary_value_function::F # value of the velocity vector on the boundary + boundary_value_function::F # value of the velocity vector on the boundary end """ @@ -314,7 +338,7 @@ The field `boundary_value_function` should be a function with signature temperature at point `x` and time `t`. """ struct Isothermal{F} - boundary_value_function::F # value of the temperature on the boundary + boundary_value_function::F # value of the temperature on the boundary end """ @@ -326,40 +350,69 @@ The field `boundary_value_normal_flux_function` should be a function with signat normal heat flux at point `x` and time `t`. """ struct Adiabatic{F} - boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn + boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - return SVector(u_inner[1], v1, v2, u_inner[4]) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + return SVector(u_inner[1], v1, v2, u_inner[4]) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - # rho, v1, v2, _ = u_inner - normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations - normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + # rho, v1, v2, _ = u_inner + normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, + t, + equations) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations + normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux) end - -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations) - return SVector(u_inner[1], v1, v2, T) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, + equations) + return SVector(u_inner[1], v1, v2, T) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) - return flux_inner +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesPrimitive + }) + return flux_inner end # specialized BC impositions for GradientVariablesEntropy. @@ -370,38 +423,69 @@ end # Taken from "Entropy stable modal discontinuous Galerkin schemes and wall boundary conditions # for the compressible Navier-Stokes equations" by Chan, Lin, Warburton 2022. # DOI: 10.1016/j.jcp.2021.110723 -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - negative_rho_inv_p = w_inner[4] # w_4 = -rho / p - return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, negative_rho_inv_p) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + negative_rho_inv_p = w_inner[4] # w_4 = -rho / p + return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, + negative_rho_inv_p) end # this is actually identical to the specialization for GradientVariablesPrimitive, but included for completeness. -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations - normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, + t, + equations) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations + normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations) - - # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w4. Similarly for w3 - w4 = -1 / T - return SVector(w_inner[1], -v1 * w4, -v2 * w4, w4) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, + equations) + T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, + equations) + + # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w4. Similarly for w3 + w4 = -1 / T + return SVector(w_inner[1], -v1 * w4, -v2 * w4, w4) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4]) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion2D{ + GradientVariablesEntropy + }) + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4]) end diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl index 5a968261503..8930489295d 100644 --- a/src/equations/compressible_navier_stokes_3d.jl +++ b/src/equations/compressible_navier_stokes_3d.jl @@ -76,37 +76,40 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = \frac{\rho v_3}{p} #!!! warning "Experimental code" # This code is experimental and may be changed or removed in any future release. """ -struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real, E <: AbstractCompressibleEulerEquations{3}} <: AbstractCompressibleNavierStokesDiffusion{3, 5} - # TODO: parabolic - # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations - # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - mu::RealT # viscosity - Pr::RealT # Prandtl number - kappa::RealT # thermal diffusivity for Fick's law - - equations_hyperbolic::E # CompressibleEulerEquations3D - gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy +struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real, + E <: AbstractCompressibleEulerEquations{3}} <: + AbstractCompressibleNavierStokesDiffusion{3, 5} + # TODO: parabolic + # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations + # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + mu::RealT # viscosity + Pr::RealT # Prandtl number + kappa::RealT # thermal diffusivity for Fick's law + + equations_hyperbolic::E # CompressibleEulerEquations3D + gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy end # default to primitive gradient variables function CompressibleNavierStokesDiffusion3D(equations::CompressibleEulerEquations3D; mu, Prandtl, gradient_variables = GradientVariablesPrimitive()) - gamma = equations.gamma - inv_gamma_minus_one = equations.inv_gamma_minus_one - μ, Pr = promote(mu, Prandtl) - - # Under the assumption of constant Prandtl number the thermal conductivity - # constant is kappa = gamma μ / ((gamma-1) Pr). - # Important note! Factor of μ is accounted for later in `flux`. - kappa = gamma * inv_gamma_minus_one / Pr - - CompressibleNavierStokesDiffusion3D{typeof(gradient_variables), typeof(gamma), typeof(equations)}(gamma, inv_gamma_minus_one, - μ, Pr, kappa, - equations, gradient_variables) + gamma = equations.gamma + inv_gamma_minus_one = equations.inv_gamma_minus_one + μ, Pr = promote(mu, Prandtl) + + # Under the assumption of constant Prandtl number the thermal conductivity + # constant is kappa = gamma μ / ((gamma-1) Pr). + # Important note! Factor of μ is accounted for later in `flux`. + kappa = gamma * inv_gamma_minus_one / Pr + + CompressibleNavierStokesDiffusion3D{typeof(gradient_variables), typeof(gamma), + typeof(equations)}(gamma, inv_gamma_minus_one, + μ, Pr, kappa, + equations, gradient_variables) end # TODO: parabolic @@ -114,225 +117,279 @@ end # varnames(::typeof(cons2prim) , ::CompressibleNavierStokesDiffusion3D) = ("v1", "v2", "v3", "T") # varnames(::typeof(cons2entropy), ::CompressibleNavierStokesDiffusion3D) = ("w2", "w3", "w4", "w5") -varnames(variable_mapping, equations_parabolic::CompressibleNavierStokesDiffusion3D) = - varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +function varnames(variable_mapping, + equations_parabolic::CompressibleNavierStokesDiffusion3D) + varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +end # we specialize this function to compute gradients of primitive variables instead of # conservative variables. -gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) = cons2prim -gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) = cons2entropy - +function gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + cons2prim +end +function gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + cons2entropy +end # Explicit formulas for the diffusive Navier-Stokes fluxes are available, e.g., in Section 2 # of the paper by Rueda-Ramírez, Hennemann, Hindenlang, Winters, and Gassner # "An Entropy Stable Nodal Discontinuous Galerkin Method for the resistive # MHD Equations. Part II: Subcell Finite Volume Shock Capturing" # where one sets the magnetic field components equal to 0. -function flux(u, gradients, orientation::Integer, equations::CompressibleNavierStokesDiffusion3D) - # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`. - rho, v1, v2, v3, _ = convert_transformed_to_primitive(u, equations) - # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, v3, T) - # either computed directly or reverse engineered from the gradient of the entropy variables - # by way of the `convert_gradient_variables` function. - _, dv1dx, dv2dx, dv3dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations) - _, dv1dy, dv2dy, dv3dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations) - _, dv1dz, dv2dz, dv3dz, dTdz = convert_derivative_to_primitive(u, gradients[3], equations) - - # Components of viscous stress tensor - - # Diagonal parts - # (4/3 * (v1)_x - 2/3 * ((v2)_y + (v3)_z) - tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * (dv2dy + dv3dz) - # (4/3 * (v2)_y - 2/3 * ((v1)_x + (v3)_z) - tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * (dv1dx + dv3dz) - # (4/3 * (v3)_z - 2/3 * ((v1)_x + (v2)_y) - tau_33 = 4.0 / 3.0 * dv3dz - 2.0 / 3.0 * (dv1dx + dv2dy) - - # Off diagonal parts, exploit that stress tensor is symmetric - # ((v1)_y + (v2)_x) - tau_12 = dv1dy + dv2dx # = tau_21 - # ((v1)_z + (v3)_x) - tau_13 = dv1dz + dv3dx # = tau_31 - # ((v2)_z + (v3)_y) - tau_23 = dv2dz + dv3dy # = tau_32 - - # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho)) - # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr) - # Note, the gas constant cancels under this formulation, so it is not present - # in the implementation - q1 = equations.kappa * dTdx - q2 = equations.kappa * dTdy - q3 = equations.kappa * dTdz - - # Constant dynamic viscosity is copied to a variable for readability. - # Offers flexibility for dynamic viscosity via Sutherland's law where it depends - # on temperature and reference values, Ts and Tref such that mu(T) - mu = equations.mu - - if orientation == 1 - # viscous flux components in the x-direction - f1 = zero(rho) - f2 = tau_11 * mu - f3 = tau_12 * mu - f4 = tau_13 * mu - f5 = ( v1 * tau_11 + v2 * tau_12 + v3 * tau_13 + q1 ) * mu - - return SVector(f1, f2, f3, f4, f5) - elseif orientation == 2 - # viscous flux components in the y-direction - # Note, symmetry is exploited for tau_12 = tau_21 - g1 = zero(rho) - g2 = tau_12 * mu # tau_21 * mu - g3 = tau_22 * mu - g4 = tau_23 * mu - g5 = ( v1 * tau_12 + v2 * tau_22 + v3 * tau_23 + q2 ) * mu - - return SVector(g1, g2, g3, g4, g5) - else # if orientation == 3 - # viscous flux components in the z-direction - # Note, symmetry is exploited for tau_13 = tau_31, tau_23 = tau_32 - h1 = zero(rho) - h2 = tau_13 * mu # tau_31 * mu - h3 = tau_23 * mu # tau_32 * mu - h4 = tau_33 * mu - h5 = ( v1 * tau_13 + v2 * tau_23 + v3 * tau_33 + q3 ) * mu - - return SVector(h1, h2, h3, h4, h5) - end +function flux(u, gradients, orientation::Integer, + equations::CompressibleNavierStokesDiffusion3D) + # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`. + rho, v1, v2, v3, _ = convert_transformed_to_primitive(u, equations) + # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, v3, T) + # either computed directly or reverse engineered from the gradient of the entropy variables + # by way of the `convert_gradient_variables` function. + _, dv1dx, dv2dx, dv3dx, dTdx = convert_derivative_to_primitive(u, gradients[1], + equations) + _, dv1dy, dv2dy, dv3dy, dTdy = convert_derivative_to_primitive(u, gradients[2], + equations) + _, dv1dz, dv2dz, dv3dz, dTdz = convert_derivative_to_primitive(u, gradients[3], + equations) + + # Components of viscous stress tensor + + # Diagonal parts + # (4/3 * (v1)_x - 2/3 * ((v2)_y + (v3)_z) + tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * (dv2dy + dv3dz) + # (4/3 * (v2)_y - 2/3 * ((v1)_x + (v3)_z) + tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * (dv1dx + dv3dz) + # (4/3 * (v3)_z - 2/3 * ((v1)_x + (v2)_y) + tau_33 = 4.0 / 3.0 * dv3dz - 2.0 / 3.0 * (dv1dx + dv2dy) + + # Off diagonal parts, exploit that stress tensor is symmetric + # ((v1)_y + (v2)_x) + tau_12 = dv1dy + dv2dx # = tau_21 + # ((v1)_z + (v3)_x) + tau_13 = dv1dz + dv3dx # = tau_31 + # ((v2)_z + (v3)_y) + tau_23 = dv2dz + dv3dy # = tau_32 + + # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho)) + # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr) + # Note, the gas constant cancels under this formulation, so it is not present + # in the implementation + q1 = equations.kappa * dTdx + q2 = equations.kappa * dTdy + q3 = equations.kappa * dTdz + + # Constant dynamic viscosity is copied to a variable for readability. + # Offers flexibility for dynamic viscosity via Sutherland's law where it depends + # on temperature and reference values, Ts and Tref such that mu(T) + mu = equations.mu + + if orientation == 1 + # viscous flux components in the x-direction + f1 = zero(rho) + f2 = tau_11 * mu + f3 = tau_12 * mu + f4 = tau_13 * mu + f5 = (v1 * tau_11 + v2 * tau_12 + v3 * tau_13 + q1) * mu + + return SVector(f1, f2, f3, f4, f5) + elseif orientation == 2 + # viscous flux components in the y-direction + # Note, symmetry is exploited for tau_12 = tau_21 + g1 = zero(rho) + g2 = tau_12 * mu # tau_21 * mu + g3 = tau_22 * mu + g4 = tau_23 * mu + g5 = (v1 * tau_12 + v2 * tau_22 + v3 * tau_23 + q2) * mu + + return SVector(g1, g2, g3, g4, g5) + else # if orientation == 3 + # viscous flux components in the z-direction + # Note, symmetry is exploited for tau_13 = tau_31, tau_23 = tau_32 + h1 = zero(rho) + h2 = tau_13 * mu # tau_31 * mu + h3 = tau_23 * mu # tau_32 * mu + h4 = tau_33 * mu + h5 = (v1 * tau_13 + v2 * tau_23 + v3 * tau_33 + q3) * mu + + return SVector(h1, h2, h3, h4, h5) + end end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::CompressibleNavierStokesDiffusion3D) - rho, rho_v1, rho_v2, rho_v3, _ = u + rho, rho_v1, rho_v2, rho_v3, _ = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - T = temperature(u, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + T = temperature(u, equations) - return SVector(rho, v1, v2, v3, T) + return SVector(rho, v1, v2, v3, T) end # Convert conservative variables to entropy # TODO: parabolic. We can improve efficiency by not computing w_1, which involves logarithms # This can be done by specializing `cons2entropy` and `entropy2cons` to `CompressibleNavierStokesDiffusion2D`, # but this may be confusing to new users. -cons2entropy(u, equations::CompressibleNavierStokesDiffusion3D) = cons2entropy(u, equations.equations_hyperbolic) -entropy2cons(w, equations::CompressibleNavierStokesDiffusion3D) = entropy2cons(w, equations.equations_hyperbolic) +function cons2entropy(u, equations::CompressibleNavierStokesDiffusion3D) + cons2entropy(u, equations.equations_hyperbolic) +end +function entropy2cons(w, equations::CompressibleNavierStokesDiffusion3D) + entropy2cons(w, equations.equations_hyperbolic) +end # the `flux` function takes in transformed variables `u` which depend on the type of the gradient variables. # For CNS, it is simplest to formulate the viscous terms in primitive variables, so we transform the transformed # variables into primitive variables. -@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - return u_transformed +@inline function convert_transformed_to_primitive(u_transformed, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + return u_transformed end # TODO: parabolic. Make this more efficient! -@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - # note: this uses CompressibleNavierStokesDiffusion3D versions of cons2prim and entropy2cons - return cons2prim(entropy2cons(u_transformed, equations), equations) +@inline function convert_transformed_to_primitive(u_transformed, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + # note: this uses CompressibleNavierStokesDiffusion3D versions of cons2prim and entropy2cons + return cons2prim(entropy2cons(u_transformed, equations), equations) end - # Takes the solution values `u` and gradient of the entropy variables (w_2, w_3, w_4, w_5) and # reverse engineers the gradients to be terms of the primitive variables (v1, v2, v3, T). # Helpful because then the diffusive fluxes have the same form as on paper. # Note, the first component of `gradient_entropy_vars` contains gradient(rho) which is unused. # TODO: parabolic; entropy stable viscous terms -@inline function convert_derivative_to_primitive(u, gradient, ::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - return gradient +@inline function convert_derivative_to_primitive(u, gradient, + ::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + return gradient end # the first argument is always the "transformed" variables. @inline function convert_derivative_to_primitive(w, gradient_entropy_vars, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - - # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back. - # We can fix this if we directly compute v1, v2, v3, T from the entropy variables - u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion3D - rho, rho_v1, rho_v2, rho_v3, _ = u - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - T = temperature(u, equations) - - return SVector(gradient_entropy_vars[1], - T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[5]), # grad(u) = T*(grad(w_2)+v1*grad(w_5)) - T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_3)+v2*grad(w_5)) - T * (gradient_entropy_vars[4] + v3 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_4)+v3*grad(w_5)) - T * T * gradient_entropy_vars[5] # grad(T) = T^2*grad(w_5)) - ) + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + + # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back. + # We can fix this if we directly compute v1, v2, v3, T from the entropy variables + u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion3D + rho, rho_v1, rho_v2, rho_v3, _ = u + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + T = temperature(u, equations) + + return SVector(gradient_entropy_vars[1], + T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[5]), # grad(u) = T*(grad(w_2)+v1*grad(w_5)) + T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_3)+v2*grad(w_5)) + T * (gradient_entropy_vars[4] + v3 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_4)+v3*grad(w_5)) + T * T * gradient_entropy_vars[5]) end - # This routine is required because `prim2cons` is called in `initial_condition`, which # is called with `equations::CompressibleEulerEquations3D`. This means it is inconsistent # with `cons2prim(..., ::CompressibleNavierStokesDiffusion3D)` as defined above. # TODO: parabolic. Is there a way to clean this up? -@inline prim2cons(u, equations::CompressibleNavierStokesDiffusion3D) = +@inline function prim2cons(u, equations::CompressibleNavierStokesDiffusion3D) prim2cons(u, equations.equations_hyperbolic) - +end @inline function temperature(u, equations::CompressibleNavierStokesDiffusion3D) - rho, rho_v1, rho_v2, rho_v3, rho_e = u + rho, rho_v1, rho_v2, rho_v3, rho_e = u - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) - T = p / rho - return T + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) + T = p / rho + return T end - @inline function enstrophy(u, gradients, equations::CompressibleNavierStokesDiffusion3D) - # Enstrophy is 0.5 rho ω⋅ω where ω = ∇ × v + # Enstrophy is 0.5 rho ω⋅ω where ω = ∇ × v - omega = vorticity(u, gradients, equations) - return 0.5 * u[1] * (omega[1]^2 + omega[2]^2 + omega[3]^2) + omega = vorticity(u, gradients, equations) + return 0.5 * u[1] * (omega[1]^2 + omega[2]^2 + omega[3]^2) end - @inline function vorticity(u, gradients, equations::CompressibleNavierStokesDiffusion3D) - # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function. - _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1], equations) - _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2], equations) - _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3], equations) + # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function. + _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1], equations) + _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2], equations) + _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3], equations) - return SVector(dv3dy - dv2dz , dv1dz - dv3dx , dv2dx - dv1dy) + return SVector(dv3dy - dv2dz, dv1dz - dv3dx, dv2dx - dv1dy) end - -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - return SVector(u_inner[1], v1, v2, v3, u_inner[5]) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + return SVector(u_inner[1], v1, v2, v3, u_inner[5]) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - # rho, v1, v2, v3, _ = u_inner - normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations - normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], normal_energy_flux) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + # rho, v1, v2, v3, _ = u_inner + normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, + t, + equations) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations + normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], + normal_energy_flux) end - -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations) - return SVector(u_inner[1], v1, v2, v3, T) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, + equations) + return SVector(u_inner[1], v1, v2, v3, T) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) - return flux_inner +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesPrimitive + }) + return flux_inner end # specialized BC impositions for GradientVariablesEntropy. @@ -343,38 +400,74 @@ end # Taken from "Entropy stable modal discontinuous Galerkin schemes and wall boundary conditions # for the compressible Navier-Stokes equations" by Chan, Lin, Warburton 2022. # DOI: 10.1016/j.jcp.2021.110723 -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - negative_rho_inv_p = w_inner[5] # w_5 = -rho / p - return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, -v3 * negative_rho_inv_p, negative_rho_inv_p) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + negative_rho_inv_p = w_inner[5] # w_5 = -rho / p + return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, + -v3 * negative_rho_inv_p, negative_rho_inv_p) end # this is actually identical to the specialization for GradientVariablesPrimitive, but included for completeness. -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations - normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], normal_energy_flux) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Adiabatic})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, + t, + equations) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations + normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], + normal_energy_flux) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Gradient, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations) - T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations) - - # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w5. Similarly for w3 and w4 - w5 = -1 / T - return SVector(w_inner[1], -v1 * w5, -v2 * w5, -v3 * w5, w5) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, + t, + equations) + T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, + equations) + + # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w5. Similarly for w3 and w4 + w5 = -1 / T + return SVector(w_inner[1], -v1 * w5, -v2 * w5, -v3 * w5, w5) end -@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector, - x, t, operator_type::Divergence, - equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) - return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], flux_inner[5]) +@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, + <:Isothermal})(flux_inner, + w_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, + equations::CompressibleNavierStokesDiffusion3D{ + GradientVariablesEntropy + }) + return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], + flux_inner[5]) end diff --git a/src/equations/equations.jl b/src/equations/equations.jl index 6640ee7cfc7..90b2cd62191 100644 --- a/src/equations/equations.jl +++ b/src/equations/equations.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Retrieve number of variables from equation instance @inline nvariables(::AbstractEquations{NDIMS, NVARS}) where {NDIMS, NVARS} = NVARS @@ -42,45 +42,42 @@ Common choices of the `conversion_function` are [`cons2cons`](@ref) and """ function varnames end - # Add methods to show some information on systems of equations. function Base.show(io::IO, equations::AbstractEquations) - # Since this is not performance-critical, we can use `@nospecialize` to reduce latency. - @nospecialize equations # reduce precompilation time - - print(io, get_name(equations), " with ") - if nvariables(equations) == 1 - print(io, "one variable") - else - print(io, nvariables(equations), " variables") - end + # Since this is not performance-critical, we can use `@nospecialize` to reduce latency. + @nospecialize equations # reduce precompilation time + + print(io, get_name(equations), " with ") + if nvariables(equations) == 1 + print(io, "one variable") + else + print(io, nvariables(equations), " variables") + end end function Base.show(io::IO, ::MIME"text/plain", equations::AbstractEquations) - # Since this is not performance-critical, we can use `@nospecialize` to reduce latency. - @nospecialize equations # reduce precompilation time - - if get(io, :compact, false) - show(io, equations) - else - summary_header(io, get_name(equations)) - summary_line(io, "#variables", nvariables(equations)) - for variable in eachvariable(equations) - summary_line(increment_indent(io), - "variable " * string(variable), - varnames(cons2cons, equations)[variable]) + # Since this is not performance-critical, we can use `@nospecialize` to reduce latency. + @nospecialize equations # reduce precompilation time + + if get(io, :compact, false) + show(io, equations) + else + summary_header(io, get_name(equations)) + summary_line(io, "#variables", nvariables(equations)) + for variable in eachvariable(equations) + summary_line(increment_indent(io), + "variable " * string(variable), + varnames(cons2cons, equations)[variable]) + end + summary_footer(io) end - summary_footer(io) - end end - -@inline Base.ndims(::AbstractEquations{NDIMS}) where NDIMS = NDIMS +@inline Base.ndims(::AbstractEquations{NDIMS}) where {NDIMS} = NDIMS # equations act like scalars in broadcasting Base.broadcastable(equations::AbstractEquations) = Ref(equations) - """ flux(u, orientation_or_normal, equations) @@ -97,13 +94,13 @@ function flux end Enables calling `flux` with a non-integer argument `normal_direction` for one-dimensional equations. Returns the value of `flux(u, 1, equations)` scaled by `normal_direction[1]`. """ -@inline function flux(u, normal_direction::AbstractVector, equations::AbstractEquations{1}) - # Call `flux` with `orientation::Int = 1` for dispatch. Note that the actual - # `orientation` argument is ignored. - return normal_direction[1] * flux(u, 1, equations) +@inline function flux(u, normal_direction::AbstractVector, + equations::AbstractEquations{1}) + # Call `flux` with `orientation::Int = 1` for dispatch. Note that the actual + # `orientation` argument is ignored. + return normal_direction[1] * flux(u, 1, equations) end - """ rotate_to_x(u, normal, equations) @@ -126,7 +123,6 @@ See also: [`rotate_to_x`](@ref) """ function rotate_from_x end - """ BoundaryConditionDirichlet(boundary_value_function) @@ -146,24 +142,28 @@ julia> BoundaryConditionDirichlet(initial_condition_convergence_test) ``` """ struct BoundaryConditionDirichlet{B} - boundary_value_function::B + boundary_value_function::B end # Dirichlet-type boundary condition for use with TreeMesh or StructuredMesh -@inline function (boundary_condition::BoundaryConditionDirichlet)(u_inner, orientation_or_normal, +@inline function (boundary_condition::BoundaryConditionDirichlet)(u_inner, + orientation_or_normal, direction, x, t, - surface_flux_function, equations) - u_boundary = boundary_condition.boundary_value_function(x, t, equations) - - # Calculate boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations) - end + surface_flux_function, + equations) + u_boundary = boundary_condition.boundary_value_function(x, t, equations) + + # Calculate boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, + equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, + equations) + end - return flux + return flux end # Dirichlet-type boundary condition for use with UnstructuredMesh2D @@ -173,13 +173,13 @@ end x, t, surface_flux_function, equations) - # get the external value of the solution - u_boundary = boundary_condition.boundary_value_function(x, t, equations) + # get the external value of the solution + u_boundary = boundary_condition.boundary_value_function(x, t, equations) - # Calculate boundary flux - flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) + # Calculate boundary flux + flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) - return flux + return flux end # operator types used for dispatch on parabolic boundary fluxes @@ -199,7 +199,7 @@ boundary_normal_flux_function(x, t, equations) where `x` specifies the coordinates, `t` is the current time, and `equation` is the corresponding system of equations. """ struct BoundaryConditionNeumann{B} - boundary_normal_flux_function::B + boundary_normal_flux_function::B end # set sensible default values that may be overwritten by specific equations @@ -216,14 +216,13 @@ The return value will be `True()` or `False()` to allow dispatching on the retur have_nonconservative_terms(::AbstractEquations) = False() have_constant_speed(::AbstractEquations) = False() -default_analysis_errors(::AbstractEquations) = (:l2_error, :linf_error) +default_analysis_errors(::AbstractEquations) = (:l2_error, :linf_error) """ default_analysis_integrals(equations) Default analysis integrals used by the [`AnalysisCallback`](@ref). """ -default_analysis_integrals(::AbstractEquations) = (entropy_timederivative,) - +default_analysis_integrals(::AbstractEquations) = (entropy_timederivative,) """ cons2cons(u, equations) @@ -333,35 +332,48 @@ function energy_internal end include("numerical_fluxes.jl") # Linear scalar advection -abstract type AbstractLinearScalarAdvectionEquation{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractLinearScalarAdvectionEquation{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("linear_scalar_advection_1d.jl") include("linear_scalar_advection_2d.jl") include("linear_scalar_advection_3d.jl") # Inviscid Burgers -abstract type AbstractInviscidBurgersEquation{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractInviscidBurgersEquation{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("inviscid_burgers_1d.jl") # Shallow water equations -abstract type AbstractShallowWaterEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractShallowWaterEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("shallow_water_1d.jl") include("shallow_water_2d.jl") include("shallow_water_two_layer_1d.jl") include("shallow_water_two_layer_2d.jl") # CompressibleEulerEquations -abstract type AbstractCompressibleEulerEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractCompressibleEulerEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("compressible_euler_1d.jl") include("compressible_euler_2d.jl") include("compressible_euler_3d.jl") # CompressibleEulerMulticomponentEquations -abstract type AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP} <: + AbstractEquations{NDIMS, NVARS} end include("compressible_euler_multicomponent_1d.jl") include("compressible_euler_multicomponent_2d.jl") # Retrieve number of components from equation instance for the multicomponent case -@inline ncomponents(::AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP}) where {NDIMS, NVARS, NCOMP} = NCOMP +@inline function ncomponents(::AbstractCompressibleEulerMulticomponentEquations{NDIMS, + NVARS, + NCOMP}) where { + NDIMS, + NVARS, + NCOMP + } + NCOMP +end """ eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations) @@ -369,21 +381,32 @@ Return an iterator over the indices that specify the location in relevant data s for the components in `AbstractCompressibleEulerMulticomponentEquations`. In particular, not the components themselves are returned. """ -@inline eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations) = Base.OneTo(ncomponents(equations)) +@inline function eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations) + Base.OneTo(ncomponents(equations)) +end # Ideal MHD -abstract type AbstractIdealGlmMhdEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractIdealGlmMhdEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("ideal_glm_mhd_1d.jl") include("ideal_glm_mhd_2d.jl") include("ideal_glm_mhd_3d.jl") # IdealGlmMhdMulticomponentEquations -abstract type AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP} <: + AbstractEquations{NDIMS, NVARS} end include("ideal_glm_mhd_multicomponent_1d.jl") include("ideal_glm_mhd_multicomponent_2d.jl") # Retrieve number of components from equation instance for the multicomponent case -@inline ncomponents(::AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP}) where {NDIMS, NVARS, NCOMP} = NCOMP +@inline function ncomponents(::AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, + NCOMP}) where { + NDIMS, + NVARS, + NCOMP + } + NCOMP +end """ eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations) @@ -391,27 +414,33 @@ Return an iterator over the indices that specify the location in relevant data s for the components in `AbstractIdealGlmMhdMulticomponentEquations`. In particular, not the components themselves are returned. """ -@inline eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations) = Base.OneTo(ncomponents(equations)) +@inline function eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations) + Base.OneTo(ncomponents(equations)) +end # Diffusion equation: first order hyperbolic system -abstract type AbstractHyperbolicDiffusionEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractHyperbolicDiffusionEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("hyperbolic_diffusion_1d.jl") include("hyperbolic_diffusion_2d.jl") include("hyperbolic_diffusion_3d.jl") # Lattice-Boltzmann equation (advection part only) -abstract type AbstractLatticeBoltzmannEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractLatticeBoltzmannEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("lattice_boltzmann_2d.jl") include("lattice_boltzmann_3d.jl") # Acoustic perturbation equations -abstract type AbstractAcousticPerturbationEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractAcousticPerturbationEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("acoustic_perturbation_2d.jl") # Linearized Euler equations -abstract type AbstractLinearizedEulerEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end +abstract type AbstractLinearizedEulerEquations{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end include("linearized_euler_2d.jl") -abstract type AbstractEquationsParabolic{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end - +abstract type AbstractEquationsParabolic{NDIMS, NVARS} <: + AbstractEquations{NDIMS, NVARS} end end # @muladd diff --git a/src/equations/equations_parabolic.jl b/src/equations/equations_parabolic.jl index 76c6eedc33c..6c0be43798a 100644 --- a/src/equations/equations_parabolic.jl +++ b/src/equations/equations_parabolic.jl @@ -3,11 +3,13 @@ gradient_variable_transformation(::AbstractEquationsParabolic) = cons2cons # Linear scalar diffusion for use in linear scalar advection-diffusion problems -abstract type AbstractLaplaceDiffusion{NDIMS, NVARS} <: AbstractEquationsParabolic{NDIMS, NVARS} end +abstract type AbstractLaplaceDiffusion{NDIMS, NVARS} <: + AbstractEquationsParabolic{NDIMS, NVARS} end include("laplace_diffusion_1d.jl") include("laplace_diffusion_2d.jl") # Compressible Navier-Stokes equations -abstract type AbstractCompressibleNavierStokesDiffusion{NDIMS, NVARS} <: AbstractEquationsParabolic{NDIMS, NVARS} end +abstract type AbstractCompressibleNavierStokesDiffusion{NDIMS, NVARS} <: + AbstractEquationsParabolic{NDIMS, NVARS} end include("compressible_navier_stokes_2d.jl") include("compressible_navier_stokes_3d.jl") diff --git a/src/equations/hyperbolic_diffusion_1d.jl b/src/equations/hyperbolic_diffusion_1d.jl index 0e599417551..39a555e7c72 100644 --- a/src/equations/hyperbolic_diffusion_1d.jl +++ b/src/equations/hyperbolic_diffusion_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" HyperbolicDiffusionEquations1D @@ -19,24 +19,26 @@ Further analysis can be found in the paper schemes [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029) """ -struct HyperbolicDiffusionEquations1D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{1, 2} - Lr::RealT # reference length scale - inv_Tr::RealT # inverse of the reference time scale - nu::RealT # diffusion constant +struct HyperbolicDiffusionEquations1D{RealT <: Real} <: + AbstractHyperbolicDiffusionEquations{1, 2} + Lr::RealT # reference length scale + inv_Tr::RealT # inverse of the reference time scale + nu::RealT # diffusion constant end -function HyperbolicDiffusionEquations1D(; nu=1.0, Lr=inv(2pi)) - Tr = Lr^2 / nu - HyperbolicDiffusionEquations1D(promote(Lr, inv(Tr), nu)...) +function HyperbolicDiffusionEquations1D(; nu = 1.0, Lr = inv(2pi)) + Tr = Lr^2 / nu + HyperbolicDiffusionEquations1D(promote(Lr, inv(Tr), nu)...) end - varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations1D) = ("phi", "q1") varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations1D) = ("phi", "q1") -default_analysis_errors(::HyperbolicDiffusionEquations1D) = (:l2_error, :linf_error, :residual) +function default_analysis_errors(::HyperbolicDiffusionEquations1D) + (:l2_error, :linf_error, :residual) +end @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations1D) - abs(du[1]) + abs(du[1]) end """ @@ -47,18 +49,19 @@ A non-priodic smooth initial condition. Can be used for convergence tests in com !!! note The solution is periodic but the initial guess is not. """ -function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations1D) - # elliptic equation: -νΔϕ = f - # Taken from Section 6.1 of Nishikawa https://doi.org/10.1016/j.jcp.2007.07.029 - if t == 0.0 - # initial "guess" of the solution and its derivative - phi = x[1]^2 - x[1] - q1 = 2*x[1] - 1 - else - phi = sinpi(x[1]) # ϕ - q1 = pi * cospi(x[1]) # ϕ_x - end - return SVector(phi, q1) +function initial_condition_poisson_nonperiodic(x, t, + equations::HyperbolicDiffusionEquations1D) + # elliptic equation: -νΔϕ = f + # Taken from Section 6.1 of Nishikawa https://doi.org/10.1016/j.jcp.2007.07.029 + if t == 0.0 + # initial "guess" of the solution and its derivative + phi = x[1]^2 - x[1] + q1 = 2 * x[1] - 1 + else + phi = sinpi(x[1]) # ϕ + q1 = pi * cospi(x[1]) # ϕ_x + end + return SVector(phi, q1) end """ @@ -71,14 +74,14 @@ diffusion system that is used with [`initial_condition_poisson_nonperiodic`](@re """ @inline function source_terms_poisson_nonperiodic(u, x, t, equations::HyperbolicDiffusionEquations1D) - # elliptic equation: -νΔϕ = f - # analytical solution: ϕ = sin(πx) and f = π^2sin(πx) - @unpack inv_Tr = equations + # elliptic equation: -νΔϕ = f + # analytical solution: ϕ = sin(πx) and f = π^2sin(πx) + @unpack inv_Tr = equations - dphi = pi^2 * sinpi(x[1]) - dq1 = -inv_Tr * u[2] + dphi = pi^2 * sinpi(x[1]) + dq1 = -inv_Tr * u[2] - return SVector(dphi, dq1) + return SVector(dphi, dq1) end """ @@ -92,37 +95,36 @@ Boundary conditions used for convergence tests in combination with function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t, surface_flux_function, equations::HyperbolicDiffusionEquations1D) - # elliptic equation: -νΔϕ = f - phi = sinpi(x[1]) # ϕ - q1 = pi * cospi(x[1]) # ϕ_x - u_boundary = SVector(phi, q1) - - # Calculate boundary flux - if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equations) - end - - return flux -end + # elliptic equation: -νΔϕ = f + phi = sinpi(x[1]) # ϕ + q1 = pi * cospi(x[1]) # ϕ_x + u_boundary = SVector(phi, q1) + + # Calculate boundary flux + if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equations) + end + return flux +end """ source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations1D) Source term that only includes the forcing from the hyperbolic diffusion system. """ -@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations1D) - # harmonic solution of the form ϕ = A + B * x, so f = 0 - @unpack inv_Tr = equations +@inline function source_terms_harmonic(u, x, t, + equations::HyperbolicDiffusionEquations1D) + # harmonic solution of the form ϕ = A + B * x, so f = 0 + @unpack inv_Tr = equations - dq1 = -inv_Tr * u[2] + dq1 = -inv_Tr * u[2] - return SVector(zero(dq1), dq1) + return SVector(zero(dq1), dq1) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations1D) @@ -132,71 +134,68 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`source_terms_harmonic`](@ref). """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations1D) - - # Determine phi_x - G = 1.0 # gravitational constant - C = -4.0 * G / pi # -4 * G / ndims * pi - A = 0.1 # perturbation coefficient must match Euler setup - rho1 = A * sinpi(x[1] - t) - # initialize with ansatz of gravity potential - phi = C * rho1 - q1 = C * A * pi * cospi(x[1] - t) # = gravity acceleration in x-direction - - return SVector(phi, q1) +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::HyperbolicDiffusionEquations1D) + + # Determine phi_x + G = 1.0 # gravitational constant + C = -4.0 * G / pi # -4 * G / ndims * pi + A = 0.1 # perturbation coefficient must match Euler setup + rho1 = A * sinpi(x[1] - t) + # initialize with ansatz of gravity potential + phi = C * rho1 + q1 = C * A * pi * cospi(x[1] - t) # = gravity acceleration in x-direction + + return SVector(phi, q1) end - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations1D) - phi, q1 = u - @unpack inv_Tr = equations +@inline function flux(u, orientation::Integer, + equations::HyperbolicDiffusionEquations1D) + phi, q1 = u + @unpack inv_Tr = equations - # Ignore orientation since it is always "1" in 1D - f1 = -equations.nu * q1 - f2 = -phi * inv_Tr + # Ignore orientation since it is always "1" in 1D + f1 = -equations.nu * q1 + f2 = -phi * inv_Tr - return SVector(f1, f2) + return SVector(f1, f2) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations1D) - λ_max = sqrt(equations.nu * equations.inv_Tr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::HyperbolicDiffusionEquations1D) + λ_max = sqrt(equations.nu * equations.inv_Tr) end - @inline have_constant_speed(::HyperbolicDiffusionEquations1D) = True() @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations1D) - return sqrt(eq.nu * eq.inv_Tr) + return sqrt(eq.nu * eq.inv_Tr) end - # Convert conservative variables to primitive @inline cons2prim(u, equations::HyperbolicDiffusionEquations1D) = u # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations1D) - phi, q1 = u + phi, q1 = u - w1 = phi - w2 = equations.Lr^2 * q1 + w1 = phi + w2 = equations.Lr^2 * q1 - return SVector(w1, w2) + return SVector(w1, w2) end - # Calculate entropy for a conservative state `u` (here: same as total energy) -@inline entropy(u, equations::HyperbolicDiffusionEquations1D) = energy_total(u, equations) - +@inline function entropy(u, equations::HyperbolicDiffusionEquations1D) + energy_total(u, equations) +end # Calculate total energy for a conservative state `u` @inline function energy_total(u, equations::HyperbolicDiffusionEquations1D) - # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1" - phi, q1 = u - return 0.5 * (phi^2 + equations.Lr^2 * q1^2) + # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1" + phi, q1 = u + return 0.5 * (phi^2 + equations.Lr^2 * q1^2) end - - end # @muladd diff --git a/src/equations/hyperbolic_diffusion_2d.jl b/src/equations/hyperbolic_diffusion_2d.jl index 0f24949faad..25536a060f8 100644 --- a/src/equations/hyperbolic_diffusion_2d.jl +++ b/src/equations/hyperbolic_diffusion_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" HyperbolicDiffusionEquations2D @@ -13,92 +13,95 @@ A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029) """ -struct HyperbolicDiffusionEquations2D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{2, 3} - Lr::RealT # reference length scale - inv_Tr::RealT # inverse of the reference time scale - nu::RealT # diffusion constant +struct HyperbolicDiffusionEquations2D{RealT <: Real} <: + AbstractHyperbolicDiffusionEquations{2, 3} + Lr::RealT # reference length scale + inv_Tr::RealT # inverse of the reference time scale + nu::RealT # diffusion constant end -function HyperbolicDiffusionEquations2D(; nu=1.0, Lr=inv(2pi)) - Tr = Lr^2 / nu - HyperbolicDiffusionEquations2D(promote(Lr, inv(Tr), nu)...) +function HyperbolicDiffusionEquations2D(; nu = 1.0, Lr = inv(2pi)) + Tr = Lr^2 / nu + HyperbolicDiffusionEquations2D(promote(Lr, inv(Tr), nu)...) end - varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations2D) = ("phi", "q1", "q2") varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations2D) = ("phi", "q1", "q2") -default_analysis_errors(::HyperbolicDiffusionEquations2D) = (:l2_error, :linf_error, :residual) +function default_analysis_errors(::HyperbolicDiffusionEquations2D) + (:l2_error, :linf_error, :residual) +end @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations2D) - abs(du[1]) + abs(du[1]) end - # Set initial conditions at physical location `x` for pseudo-time `t` -@inline function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations2D) - # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω - if iszero(t) - T = eltype(x) - phi = one(T) - q1 = one(T) - q2 = one(T) - else - sinpi_x1, cospi_x1 = sincos(pi*x[1]) - sinpi_2x2, cospi_2x2 = sincos(pi*2*x[2]) - phi = 2 * cospi_x1 * sinpi_2x2 + 2 # ϕ - q1 = -2 * pi * sinpi_x1 * sinpi_2x2 # ϕ_x - q2 = 4 * pi * cospi_x1 * cospi_2x2 # ϕ_y - end - return SVector(phi, q1, q2) -end - -@inline function source_terms_poisson_nonperiodic(u, x, t, equations::HyperbolicDiffusionEquations2D) - # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω - # analytical solution: ϕ = 2cos(πx)sin(2πy) + 2 and f = 10π^2cos(πx)sin(2πy) - @unpack inv_Tr = equations - - x1, x2 = x - du1 = 10 * pi^2 * cospi(x1) * sinpi(2 * x2) - du2 = -inv_Tr * u[2] - du3 = -inv_Tr * u[3] - - return SVector(du1, du2, du3) -end - -@inline function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t, +@inline function initial_condition_poisson_nonperiodic(x, t, + equations::HyperbolicDiffusionEquations2D) + # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω + if iszero(t) + T = eltype(x) + phi = one(T) + q1 = one(T) + q2 = one(T) + else + sinpi_x1, cospi_x1 = sincos(pi * x[1]) + sinpi_2x2, cospi_2x2 = sincos(pi * 2 * x[2]) + phi = 2 * cospi_x1 * sinpi_2x2 + 2 # ϕ + q1 = -2 * pi * sinpi_x1 * sinpi_2x2 # ϕ_x + q2 = 4 * pi * cospi_x1 * cospi_2x2 # ϕ_y + end + return SVector(phi, q1, q2) +end + +@inline function source_terms_poisson_nonperiodic(u, x, t, + equations::HyperbolicDiffusionEquations2D) + # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω + # analytical solution: ϕ = 2cos(πx)sin(2πy) + 2 and f = 10π^2cos(πx)sin(2πy) + @unpack inv_Tr = equations + + x1, x2 = x + du1 = 10 * pi^2 * cospi(x1) * sinpi(2 * x2) + du2 = -inv_Tr * u[2] + du3 = -inv_Tr * u[3] + + return SVector(du1, du2, du3) +end + +@inline function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, + x, t, surface_flux_function, equations::HyperbolicDiffusionEquations2D) - # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω - u_boundary = initial_condition_poisson_nonperiodic(x, one(t), equations) + # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω + u_boundary = initial_condition_poisson_nonperiodic(x, one(t), equations) - # Calculate boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equations) - end + # Calculate boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equations) + end - return flux + return flux end - """ source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations2D) Source term that only includes the forcing from the hyperbolic diffusion system. """ -@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations2D) - # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0 - @unpack inv_Tr = equations - phi, q1, q2 = u +@inline function source_terms_harmonic(u, x, t, + equations::HyperbolicDiffusionEquations2D) + # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0 + @unpack inv_Tr = equations + phi, q1, q2 = u - du2 = -inv_Tr * q1 - du3 = -inv_Tr * q2 + du2 = -inv_Tr * q1 + du3 = -inv_Tr * q2 - return SVector(0, du2, du3) + return SVector(0, du2, du3) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations2D) @@ -108,136 +111,139 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`source_terms_harmonic`](@ref). """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations2D) +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::HyperbolicDiffusionEquations2D) - # Determine phi_x, phi_y - G = 1.0 # gravitational constant - C = -2.0*G/pi - A = 0.1 # perturbation coefficient must match Euler setup - rho1 = A * sin(pi * (x[1] + x[2] - t)) - # initialize with ansatz of gravity potential - phi = C * rho1 - q1 = C * A * pi * cos(pi*(x[1] + x[2] - t)) # = gravity acceleration in x-direction - q2 = q1 # = gravity acceleration in y-direction + # Determine phi_x, phi_y + G = 1.0 # gravitational constant + C = -2.0 * G / pi + A = 0.1 # perturbation coefficient must match Euler setup + rho1 = A * sin(pi * (x[1] + x[2] - t)) + # initialize with ansatz of gravity potential + phi = C * rho1 + q1 = C * A * pi * cos(pi * (x[1] + x[2] - t)) # = gravity acceleration in x-direction + q2 = q1 # = gravity acceleration in y-direction - return SVector(phi, q1, q2) + return SVector(phi, q1, q2) end - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations2D) - phi, q1, q2 = u - @unpack inv_Tr = equations +@inline function flux(u, orientation::Integer, + equations::HyperbolicDiffusionEquations2D) + phi, q1, q2 = u + @unpack inv_Tr = equations - if orientation == 1 - f1 = -equations.nu*q1 - f2 = -phi * inv_Tr - f3 = zero(phi) - else - f1 = -equations.nu*q2 - f2 = zero(phi) - f3 = -phi * inv_Tr - end + if orientation == 1 + f1 = -equations.nu * q1 + f2 = -phi * inv_Tr + f3 = zero(phi) + else + f1 = -equations.nu * q2 + f2 = zero(phi) + f3 = -phi * inv_Tr + end - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D) - phi, q1, q2 = u - @unpack inv_Tr = equations +@inline function flux(u, normal_direction::AbstractVector, + equations::HyperbolicDiffusionEquations2D) + phi, q1, q2 = u + @unpack inv_Tr = equations - f1 = -equations.nu * (normal_direction[1] * q1 + normal_direction[2] * q2) - f2 = -phi * inv_Tr * normal_direction[1] - f3 = -phi * inv_Tr * normal_direction[2] + f1 = -equations.nu * (normal_direction[1] * q1 + normal_direction[2] * q2) + f2 = -phi * inv_Tr * normal_direction[1] + f3 = -phi * inv_Tr * normal_direction[2] - return SVector(f1, f2, f3) + return SVector(f1, f2, f3) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations2D) - sqrt(equations.nu * equations.inv_Tr) -end - -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D) - sqrt(equations.nu * equations.inv_Tr) * norm(normal_direction) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::HyperbolicDiffusionEquations2D) + sqrt(equations.nu * equations.inv_Tr) +end + +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::HyperbolicDiffusionEquations2D) + sqrt(equations.nu * equations.inv_Tr) * norm(normal_direction) +end + +@inline function flux_godunov(u_ll, u_rr, orientation::Integer, + equations::HyperbolicDiffusionEquations2D) + # Obtain left and right fluxes + phi_ll, q1_ll, q2_ll = u_ll + phi_rr, q1_rr, q2_rr = u_rr + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + # this is an optimized version of the application of the upwind dissipation matrix: + # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] + λ_max = sqrt(equations.nu * equations.inv_Tr) + f1 = 1 / 2 * (f_ll[1] + f_rr[1]) - 1 / 2 * λ_max * (phi_rr - phi_ll) + if orientation == 1 # x-direction + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) - 1 / 2 * λ_max * (q1_rr - q1_ll) + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) + else # y-direction + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) - 1 / 2 * λ_max * (q2_rr - q2_ll) + end + + return SVector(f1, f2, f3) +end + +@inline function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, + equations::HyperbolicDiffusionEquations2D) + # Obtain left and right fluxes + phi_ll, q1_ll, q2_ll = u_ll + phi_rr, q1_rr, q2_rr = u_rr + f_ll = flux(u_ll, normal_direction, equations) + f_rr = flux(u_rr, normal_direction, equations) + + # this is an optimized version of the application of the upwind dissipation matrix: + # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] + λ_max = sqrt(equations.nu * equations.inv_Tr) + f1 = 1 / 2 * (f_ll[1] + f_rr[1]) - + 1 / 2 * λ_max * (phi_rr - phi_ll) * + sqrt(normal_direction[1]^2 + normal_direction[2]^2) + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) - + 1 / 2 * λ_max * (q1_rr - q1_ll) * normal_direction[1] + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) - + 1 / 2 * λ_max * (q2_rr - q2_ll) * normal_direction[2] + + return SVector(f1, f2, f3) end - -@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations2D) - # Obtain left and right fluxes - phi_ll, q1_ll, q2_ll = u_ll - phi_rr, q1_rr, q2_rr = u_rr - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - # this is an optimized version of the application of the upwind dissipation matrix: - # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] - λ_max = sqrt(equations.nu * equations.inv_Tr) - f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll) - if orientation == 1 # x-direction - f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll) - f3 = 1/2 * (f_ll[3] + f_rr[3]) - else # y-direction - f2 = 1/2 * (f_ll[2] + f_rr[2]) - f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll) - end - - return SVector(f1, f2, f3) -end - -@inline function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D) - # Obtain left and right fluxes - phi_ll, q1_ll, q2_ll = u_ll - phi_rr, q1_rr, q2_rr = u_rr - f_ll = flux(u_ll, normal_direction, equations) - f_rr = flux(u_rr, normal_direction, equations) - - # this is an optimized version of the application of the upwind dissipation matrix: - # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] - λ_max = sqrt(equations.nu * equations.inv_Tr) - f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll) * sqrt(normal_direction[1]^2 + normal_direction[2]^2) - f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll) * normal_direction[1] - f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll) * normal_direction[2] - - return SVector(f1, f2, f3) -end - - - @inline have_constant_speed(::HyperbolicDiffusionEquations2D) = True() @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations2D) - λ = sqrt(eq.nu * eq.inv_Tr) - return λ, λ + λ = sqrt(eq.nu * eq.inv_Tr) + return λ, λ end - # Convert conservative variables to primitive @inline cons2prim(u, equations::HyperbolicDiffusionEquations2D) = u # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations2D) - phi, q1, q2 = u - w1 = phi - w2 = equations.Lr^2 * q1 - w3 = equations.Lr^2 * q2 + phi, q1, q2 = u + w1 = phi + w2 = equations.Lr^2 * q1 + w3 = equations.Lr^2 * q2 - return SVector(w1, w2, w3) + return SVector(w1, w2, w3) end - # Calculate entropy for a conservative state `u` (here: same as total energy) -@inline entropy(u, equations::HyperbolicDiffusionEquations2D) = energy_total(u, equations) - +@inline function entropy(u, equations::HyperbolicDiffusionEquations2D) + energy_total(u, equations) +end # Calculate total energy for a conservative state `u` @inline function energy_total(u, equations::HyperbolicDiffusionEquations2D) - # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1" - phi, q1, q2 = u - return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2)) + # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1" + phi, q1, q2 = u + return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2)) end - - end # @muladd diff --git a/src/equations/hyperbolic_diffusion_3d.jl b/src/equations/hyperbolic_diffusion_3d.jl index 2b4cfd95829..bf6a00140d4 100644 --- a/src/equations/hyperbolic_diffusion_3d.jl +++ b/src/equations/hyperbolic_diffusion_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" HyperbolicDiffusionEquations3D @@ -13,21 +13,27 @@ A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029) """ -struct HyperbolicDiffusionEquations3D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{3, 4} - Lr::RealT # reference length scale - inv_Tr::RealT # inverse of the reference time scale - nu::RealT # diffusion constant +struct HyperbolicDiffusionEquations3D{RealT <: Real} <: + AbstractHyperbolicDiffusionEquations{3, 4} + Lr::RealT # reference length scale + inv_Tr::RealT # inverse of the reference time scale + nu::RealT # diffusion constant end -function HyperbolicDiffusionEquations3D(; nu=1.0, Lr=inv(2pi)) - Tr = Lr^2 / nu - HyperbolicDiffusionEquations3D(promote(Lr, inv(Tr), nu)...) +function HyperbolicDiffusionEquations3D(; nu = 1.0, Lr = inv(2pi)) + Tr = Lr^2 / nu + HyperbolicDiffusionEquations3D(promote(Lr, inv(Tr), nu)...) end - -varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations3D) = ("phi", "q1", "q2", "q3") -varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations3D) = ("phi", "q1", "q2", "q3") -default_analysis_errors(::HyperbolicDiffusionEquations3D) = (:l2_error, :linf_error, :residual) +function varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations3D) + ("phi", "q1", "q2", "q3") +end +function varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations3D) + ("phi", "q1", "q2", "q3") +end +function default_analysis_errors(::HyperbolicDiffusionEquations3D) + (:l2_error, :linf_error, :residual) +end """ residual_steady_state(du, ::AbstractHyperbolicDiffusionEquations) @@ -36,80 +42,80 @@ Used to determine the termination criterion of a [`SteadyStateCallback`](@ref). For hyperbolic diffusion, this checks convergence of the potential ``\\phi``. """ @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations3D) - abs(du[1]) + abs(du[1]) end - # Set initial conditions at physical location `x` for pseudo-time `t` -function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations3D) - # elliptic equation: -νΔϕ = f - if t == 0.0 - phi = 1.0 - q1 = 1.0 - q2 = 1.0 - q3 = 1.0 - else - phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ - q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_x - q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_y - q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3]) # ϕ_z - end - return SVector(phi, q1, q2, q3) -end - -@inline function source_terms_poisson_nonperiodic(u, x, t, equations::HyperbolicDiffusionEquations3D) - # elliptic equation: -νΔϕ = f - # analytical solution: ϕ = 2 cos(πx)sin(2πy)sin(2πz) + 2 and f = 18 π^2 cos(πx)sin(2πy)sin(2πz) - @unpack inv_Tr = equations - - x1, x2, x3 = x - du1 = 18 * pi^2 * cospi(x1) * sinpi(2 * x2) * sinpi(2 * x3) - du2 = -inv_Tr * u[2] - du3 = -inv_Tr * u[3] - du4 = -inv_Tr * u[4] - - return SVector(du1, du2, du3, du4) +function initial_condition_poisson_nonperiodic(x, t, + equations::HyperbolicDiffusionEquations3D) + # elliptic equation: -νΔϕ = f + if t == 0.0 + phi = 1.0 + q1 = 1.0 + q2 = 1.0 + q3 = 1.0 + else + phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ + q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_x + q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_y + q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3]) # ϕ_z + end + return SVector(phi, q1, q2, q3) end -function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t, - surface_flux_function, - equations::HyperbolicDiffusionEquations3D) - # elliptic equation: -νΔϕ = f - phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ - q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_x - q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_y - q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3]) # ϕ_z - u_boundary = SVector(phi, q1, q2, q3) +@inline function source_terms_poisson_nonperiodic(u, x, t, + equations::HyperbolicDiffusionEquations3D) + # elliptic equation: -νΔϕ = f + # analytical solution: ϕ = 2 cos(πx)sin(2πy)sin(2πz) + 2 and f = 18 π^2 cos(πx)sin(2πy)sin(2πz) + @unpack inv_Tr = equations - # Calculate boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equations) - end + x1, x2, x3 = x + du1 = 18 * pi^2 * cospi(x1) * sinpi(2 * x2) * sinpi(2 * x3) + du2 = -inv_Tr * u[2] + du3 = -inv_Tr * u[3] + du4 = -inv_Tr * u[4] - return flux + return SVector(du1, du2, du3, du4) end +function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t, + surface_flux_function, + equations::HyperbolicDiffusionEquations3D) + # elliptic equation: -νΔϕ = f + phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ + q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_x + q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) # ϕ_y + q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3]) # ϕ_z + u_boundary = SVector(phi, q1, q2, q3) + + # Calculate boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equations) + end + + return flux +end """ source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations3D) Source term that only includes the forcing from the hyperbolic diffusion system. """ -@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations3D) - # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0 - @unpack inv_Tr = equations +@inline function source_terms_harmonic(u, x, t, + equations::HyperbolicDiffusionEquations3D) + # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0 + @unpack inv_Tr = equations - du1 = zero(u[1]) - du2 = -inv_Tr * u[2] - du3 = -inv_Tr * u[3] - du4 = -inv_Tr * u[4] + du1 = zero(u[1]) + du2 = -inv_Tr * u[2] + du3 = -inv_Tr * u[3] + du4 = -inv_Tr * u[4] - return SVector(du1, du2, du3, du4) + return SVector(du1, du2, du3, du4) end - """ initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations3D) @@ -119,119 +125,113 @@ Setup used for convergence tests of the Euler equations with self-gravity used i [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593) in combination with [`source_terms_harmonic`](@ref). """ -function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations3D) - - # Determine phi_x, phi_y - G = 1.0 # gravitational constant - C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions # 2D: -2.0*G/pi - A = 0.1 # perturbation coefficient must match Euler setup - rho1 = A * sin(pi * (x[1] + x[2] + x[3] - t)) - # initialize with ansatz of gravity potential - phi = C_grav * rho1 - q1 = C_grav * A * pi * cos(pi*(x[1] + x[2] + x[3] - t)) # = gravity acceleration in x-direction - q2 = q1 # = gravity acceleration in y-direction - q3 = q1 # = gravity acceleration in z-direction - - return SVector(phi, q1, q2, q3) +function initial_condition_eoc_test_coupled_euler_gravity(x, t, + equations::HyperbolicDiffusionEquations3D) + + # Determine phi_x, phi_y + G = 1.0 # gravitational constant + C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions # 2D: -2.0*G/pi + A = 0.1 # perturbation coefficient must match Euler setup + rho1 = A * sin(pi * (x[1] + x[2] + x[3] - t)) + # initialize with ansatz of gravity potential + phi = C_grav * rho1 + q1 = C_grav * A * pi * cos(pi * (x[1] + x[2] + x[3] - t)) # = gravity acceleration in x-direction + q2 = q1 # = gravity acceleration in y-direction + q3 = q1 # = gravity acceleration in z-direction + + return SVector(phi, q1, q2, q3) end - - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations3D) - phi, q1, q2, q3 = u - - if orientation == 1 - f1 = -equations.nu*q1 - f2 = -phi * equations.inv_Tr - f3 = zero(phi) - f4 = zero(phi) - elseif orientation == 2 - f1 = -equations.nu*q2 - f2 = zero(phi) - f3 = -phi * equations.inv_Tr - f4 = zero(phi) - else - f1 = -equations.nu*q3 - f2 = zero(phi) - f3 = zero(phi) - f4 = -phi * equations.inv_Tr - end - - return SVector(f1, f2, f3, f4) +@inline function flux(u, orientation::Integer, + equations::HyperbolicDiffusionEquations3D) + phi, q1, q2, q3 = u + + if orientation == 1 + f1 = -equations.nu * q1 + f2 = -phi * equations.inv_Tr + f3 = zero(phi) + f4 = zero(phi) + elseif orientation == 2 + f1 = -equations.nu * q2 + f2 = zero(phi) + f3 = -phi * equations.inv_Tr + f4 = zero(phi) + else + f1 = -equations.nu * q3 + f2 = zero(phi) + f3 = zero(phi) + f4 = -phi * equations.inv_Tr + end + + return SVector(f1, f2, f3, f4) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations3D) - λ_max = sqrt(equations.nu * equations.inv_Tr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::HyperbolicDiffusionEquations3D) + λ_max = sqrt(equations.nu * equations.inv_Tr) end - -@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations3D) - # Obtain left and right fluxes - phi_ll, q1_ll, q2_ll, q3_ll = u_ll - phi_rr, q1_rr, q2_rr, q3_rr = u_rr - f_ll = flux(u_ll, orientation, equations) - f_rr = flux(u_rr, orientation, equations) - - # this is an optimized version of the application of the upwind dissipation matrix: - # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] - λ_max = sqrt(equations.nu * equations.inv_Tr) - f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll) - if orientation == 1 # x-direction - f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll) - f3 = 1/2 * (f_ll[3] + f_rr[3]) - f4 = 1/2 * (f_ll[4] + f_rr[4]) - elseif orientation == 2 # y-direction - f2 = 1/2 * (f_ll[2] + f_rr[2]) - f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll) - f4 = 1/2 * (f_ll[4] + f_rr[4]) - else # y-direction - f2 = 1/2 * (f_ll[2] + f_rr[2]) - f3 = 1/2 * (f_ll[3] + f_rr[3]) - f4 = 1/2 * (f_ll[4] + f_rr[4]) - 1/2 * λ_max * (q3_rr - q3_ll) - end - - return SVector(f1, f2, f3, f4) +@inline function flux_godunov(u_ll, u_rr, orientation::Integer, + equations::HyperbolicDiffusionEquations3D) + # Obtain left and right fluxes + phi_ll, q1_ll, q2_ll, q3_ll = u_ll + phi_rr, q1_rr, q2_rr, q3_rr = u_rr + f_ll = flux(u_ll, orientation, equations) + f_rr = flux(u_rr, orientation, equations) + + # this is an optimized version of the application of the upwind dissipation matrix: + # dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]] + λ_max = sqrt(equations.nu * equations.inv_Tr) + f1 = 1 / 2 * (f_ll[1] + f_rr[1]) - 1 / 2 * λ_max * (phi_rr - phi_ll) + if orientation == 1 # x-direction + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) - 1 / 2 * λ_max * (q1_rr - q1_ll) + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) + f4 = 1 / 2 * (f_ll[4] + f_rr[4]) + elseif orientation == 2 # y-direction + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) - 1 / 2 * λ_max * (q2_rr - q2_ll) + f4 = 1 / 2 * (f_ll[4] + f_rr[4]) + else # y-direction + f2 = 1 / 2 * (f_ll[2] + f_rr[2]) + f3 = 1 / 2 * (f_ll[3] + f_rr[3]) + f4 = 1 / 2 * (f_ll[4] + f_rr[4]) - 1 / 2 * λ_max * (q3_rr - q3_ll) + end + + return SVector(f1, f2, f3, f4) end - - @inline have_constant_speed(::HyperbolicDiffusionEquations3D) = True() @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations3D) - λ = sqrt(eq.nu * eq.inv_Tr) - return λ, λ, λ + λ = sqrt(eq.nu * eq.inv_Tr) + return λ, λ, λ end - # Convert conservative variables to primitive @inline cons2prim(u, equations::HyperbolicDiffusionEquations3D) = u - # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations3D) - phi, q1, q2, q3 = u - w1 = phi - w2 = equations.Lr^2 * q1 - w3 = equations.Lr^2 * q2 - w4 = equations.Lr^2 * q3 + phi, q1, q2, q3 = u + w1 = phi + w2 = equations.Lr^2 * q1 + w3 = equations.Lr^2 * q2 + w4 = equations.Lr^2 * q3 - return SVector(w1, w2, w3, w4) + return SVector(w1, w2, w3, w4) end - # Calculate entropy for a conservative state `u` (here: same as total energy) -@inline entropy(u, equations::HyperbolicDiffusionEquations3D) = energy_total(u, equations) - +@inline function entropy(u, equations::HyperbolicDiffusionEquations3D) + energy_total(u, equations) +end # Calculate total energy for a conservative state `u` @inline function energy_total(u, equations::HyperbolicDiffusionEquations3D) - # energy function as found in equation (2.5.12) in the book "I Do Like CFD, Vol. 1" - phi, q1, q2, q3 = u - return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2 + q3^2)) + # energy function as found in equation (2.5.12) in the book "I Do Like CFD, Vol. 1" + phi, q1, q2, q3 = u + return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2 + q3^2)) end - - end # @muladd diff --git a/src/equations/ideal_glm_mhd_1d.jl b/src/equations/ideal_glm_mhd_1d.jl index 980ff24d9ef..4ef593cda53 100644 --- a/src/equations/ideal_glm_mhd_1d.jl +++ b/src/equations/ideal_glm_mhd_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" IdealGlmMhdEquations1D(gamma) @@ -15,21 +15,26 @@ specific heats `gamma` in one space dimension. There is no divergence cleaning variable `psi` because the divergence-free constraint is satisfied trivially in one spatial dimension. """ -struct IdealGlmMhdEquations1D{RealT<:Real} <: AbstractIdealGlmMhdEquations{1, 8} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - - function IdealGlmMhdEquations1D(gamma) - γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) - new{typeof(γ)}(γ, inv_gamma_minus_one) - end +struct IdealGlmMhdEquations1D{RealT <: Real} <: AbstractIdealGlmMhdEquations{1, 8} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + + function IdealGlmMhdEquations1D(gamma) + γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1)) + new{typeof(γ)}(γ, inv_gamma_minus_one) + end end have_nonconservative_terms(::IdealGlmMhdEquations1D) = False() -varnames(::typeof(cons2cons), ::IdealGlmMhdEquations1D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3") -varnames(::typeof(cons2prim), ::IdealGlmMhdEquations1D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3") -default_analysis_integrals(::IdealGlmMhdEquations1D) = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) - +function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations1D) + ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3") +end +function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations1D) + ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3") +end +function default_analysis_integrals(::IdealGlmMhdEquations1D) + (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) +end """ initial_condition_constant(x, t, equations::IdealGlmMhdEquations1D) @@ -37,40 +42,38 @@ default_analysis_integrals(::IdealGlmMhdEquations1D) = (entropy_timederivative, A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::IdealGlmMhdEquations1D) - rho = 1.0 - rho_v1 = 0.1 - rho_v2 = -0.2 - rho_v3 = -0.5 - rho_e = 50.0 - B1 = 3.0 - B2 = -1.2 - B3 = 0.5 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) + rho = 1.0 + rho_v1 = 0.1 + rho_v2 = -0.2 + rho_v3 = -0.5 + rho_e = 50.0 + B1 = 3.0 + B2 = -1.2 + B3 = 0.5 + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) end - """ initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations1D) An Alfvén wave as smooth initial condition used for convergence tests. """ function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations1D) - # smooth Alfvén wave test from Derigs et al. FLASH (2016) - # domain must be set to [0, 1], γ = 5/3 - rho = 1.0 - v1 = 0.0 - # TODO: sincospi - si, co = sincos(2 * pi * x[1]) - v2 = 0.1 * si - v3 = 0.1 * co - p = 0.1 - B1 = 1.0 - B2 = v2 - B3 = v3 - return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3), equations) + # smooth Alfvén wave test from Derigs et al. FLASH (2016) + # domain must be set to [0, 1], γ = 5/3 + rho = 1.0 + v1 = 0.0 + # TODO: sincospi + si, co = sincos(2 * pi * x[1]) + v2 = 0.1 * si + v3 = 0.1 * co + p = 0.1 + B1 = 1.0 + B2 = v2 + B3 = v3 + return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3), equations) end - """ initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations1D) @@ -80,48 +83,47 @@ A weak blast wave adapted from [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations1D) - # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Same discontinuity in the velocities but with magnetic fields - # Set up polar coordinates - inicenter = (0,) - x_norm = x[1] - inicenter[1] - r = sqrt(x_norm^2) - phi = atan(x_norm) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, 0.0, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations) + # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Same discontinuity in the velocities but with magnetic fields + # Set up polar coordinates + inicenter = (0,) + x_norm = x[1] - inicenter[1] + r = sqrt(x_norm^2) + phi = atan(x_norm) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, 0.0, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations) end - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p_over_gamma_minus_one = (rho_e - kin_en - mag_en) - p = (equations.gamma - 1) * p_over_gamma_minus_one - - # Ignore orientation since it is always "1" in 1D - f1 = rho_v1 - f2 = rho_v1*v1 + p + mag_en - B1^2 - f3 = rho_v1*v2 - B1*B2 - f4 = rho_v1*v3 - B1*B3 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) - f6 = 0.0 - f7 = v1*B2 - v2*B1 - f8 = v1*B3 - v3*B1 - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p_over_gamma_minus_one = (rho_e - kin_en - mag_en) + p = (equations.gamma - 1) * p_over_gamma_minus_one + + # Ignore orientation since it is always "1" in 1D + f1 = rho_v1 + f2 = rho_v1 * v1 + p + mag_en - B1^2 + f3 = rho_v1 * v2 - B1 * B2 + f4 = rho_v1 * v3 - B1 * B3 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 - + B1 * (v1 * B1 + v2 * B2 + v3 * B3) + f6 = 0.0 + f7 = v1 * B2 - v2 * B1 + f8 = v1 * B3 - v3 * B1 + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8) end - """ flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D) @@ -131,63 +133,66 @@ Entropy conserving two-point flux by divergence diminishing ideal magnetohydrodynamics equations [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002) """ -function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D) - # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr - - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v3_ll = rho_v3_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v3_rr = rho_v3_rr/rho_rr - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll) - p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr) - beta_ll = 0.5*rho_ll/p_ll - beta_rr = 0.5*rho_rr/p_rr - # for convenience store v⋅B - vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll - vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr - - # Compute the necessary mean values needed for either direction - rho_avg = 0.5*(rho_ll+rho_rr) - rho_mean = ln_mean(rho_ll,rho_rr) - beta_mean = ln_mean(beta_ll,beta_rr) - beta_avg = 0.5*(beta_ll+beta_rr) - v1_avg = 0.5*(v1_ll+v1_rr) - v2_avg = 0.5*(v2_ll+v2_rr) - v3_avg = 0.5*(v3_ll+v3_rr) - p_mean = 0.5*rho_avg/beta_avg - B1_avg = 0.5*(B1_ll+B1_rr) - B2_avg = 0.5*(B2_ll+B2_rr) - B3_avg = 0.5*(B3_ll+B3_rr) - vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr) - mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr) - vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr) - - # Ignore orientation since it is always "1" in 1D - f1 = rho_mean*v1_avg - f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg - f3 = f1*v2_avg - B1_avg*B2_avg - f4 = f1*v3_avg - B1_avg*B3_avg - f6 = 0.0 - f7 = v1_avg*B2_avg - v2_avg*B1_avg - f8 = v1_avg*B3_avg - v3_avg*B1_avg - # total energy flux is complicated and involves the previous eight components - v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg - 0.5*v1_mag_avg + - B1_avg*vel_dot_mag_avg) - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8) +function flux_derigs_etal(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations1D) + # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + p_ll = (equations.gamma - 1) * + (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll) + p_rr = (equations.gamma - 1) * + (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr) + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + # for convenience store v⋅B + vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr + + # Compute the necessary mean values needed for either direction + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_mean = 0.5 * rho_avg / beta_avg + B1_avg = 0.5 * (B1_ll + B1_rr) + B2_avg = 0.5 * (B2_ll + B2_rr) + B3_avg = 0.5 * (B3_ll + B3_rr) + vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) + mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) + vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) + + # Ignore orientation since it is always "1" in 1D + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg + f3 = f1 * v2_avg - B1_avg * B2_avg + f4 = f1 * v3_avg - B1_avg * B3_avg + f6 = 0.0 + f7 = v1_avg * B2_avg - v2_avg * B1_avg + f8 = v1_avg * B3_avg - v3_avg * B1_avg + # total energy flux is complicated and involves the previous eight components + v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg - 0.5 * v1_mag_avg + + B1_avg * vel_dot_mag_avg) + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8) end - """ flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations1D) @@ -210,65 +215,68 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D) - # Unpack left and right states - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values needed for either direction - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - # Calculate fluxes depending on orientation with specific direction averages - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) - #f5 below - f6 = 0.0 - f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) - f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v1_rr + p_rr * v1_ll - + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) - + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) - - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) - - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) ) ) - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8) +@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations1D) + # Unpack left and right states + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations) + + # Compute the necessary mean values needed for either direction + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + # Calculate fluxes depending on orientation with specific direction averages + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + magnetic_square_avg - + 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) + #f5 below + f6 = 0.0 + f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) + f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v1_rr + p_rr * v1_ll + + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) + + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) + - + (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) + - + (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll))) + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D) - rho_ll, rho_v1_ll, _ = u_ll - rho_rr, rho_v1_rr, _ = u_rr - - # Calculate velocities (ignore orientation since it is always "1" in 1D) - # and fast magnetoacoustic wave speeds - # left - v_ll = rho_v1_ll / rho_ll - cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) - # right - v_rr = rho_v1_rr / rho_rr - cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations1D) + rho_ll, rho_v1_ll, _ = u_ll + rho_rr, rho_v1_rr, _ = u_rr + + # Calculate velocities (ignore orientation since it is always "1" in 1D) + # and fast magnetoacoustic wave speeds + # left + v_ll = rho_v1_ll / rho_ll + cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) + # right + v_rr = rho_v1_rr / rho_rr + cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) + + λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end - """ min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D) @@ -277,127 +285,128 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in An HLLC Riemann solver for magneto-hydrodynamics [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020) """ -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D) - rho_ll, rho_v1_ll, _ = u_ll - rho_rr, rho_v1_rr, _ = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll / rho_ll - v1_rr = rho_v1_rr/rho_rr - - # Approximate the left-most and right-most eigenvalues in the Riemann fan - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) - - return λ_min, λ_max +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations1D) + rho_ll, rho_v1_ll, _ = u_ll + rho_rr, rho_v1_rr, _ = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + + # Approximate the left-most and right-most eigenvalues in the Riemann fan + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) + + return λ_min, λ_max end - @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, _ = u - v1 = rho_v1 / rho - cf_x_direction = calc_fast_wavespeed(u, 1, equations) + rho, rho_v1, _ = u + v1 = rho_v1 / rho + cf_x_direction = calc_fast_wavespeed(u, 1, equations) - return abs(v1) + cf_x_direction + return abs(v1) + cf_x_direction end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 - + B1 * B1 + B2 * B2 + B3 * B3)) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * (rho_e - + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 + + B1 * B1 + B2 * B2 + B3 * B3)) - return SVector(rho, v1, v2, v3, p, B1, B2, B3) + return SVector(rho, v1, v2, v3, p, B1, B2, B3) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2)) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p - - w1 = (equations.gamma - s) / (equations.gamma-1) - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = rho_p * v2 - w4 = rho_p * v3 - w5 = -rho_p - w6 = rho_p * B1 - w7 = rho_p * B2 - w8 = rho_p * B3 - - return SVector(w1, w2, w3, w4, w5, w6, w7, w8) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + p = (equations.gamma - 1) * + (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2)) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p + + w1 = (equations.gamma - s) / (equations.gamma - 1) - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = rho_p * v2 + w4 = rho_p * v3 + w5 = -rho_p + w6 = rho_p * B1 + w7 = rho_p * B2 + w8 = rho_p * B3 + + return SVector(w1, w2, w3, w4, w5, w6, w7, w8) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::IdealGlmMhdEquations1D) - rho, v1, v2, v3, p, B1, B2, B3 = prim + rho, v1, v2, v3, p, B1, B2, B3 = prim - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 - rho_e = p/(equations.gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) + - 0.5 * (B1^2 + B2^2 + B3^2) + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 + rho_e = p / (equations.gamma - 1) + + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + + 0.5 * (B1^2 + B2^2 + B3^2) - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) end - @inline function density(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - return rho + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + return rho end @inline function pressure(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2)) - return p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2)) + return p end @inline function density_pressure(u, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2)) - return rho * p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2)) + return rho * p end - # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue @inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdEquations1D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_mag = sqrt(v1^2 + v2^2 + v3^2) - p = (equations.gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2)) - a_square = equations.gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1^2 + b2^2 + b3^2 - - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2)) - return c_f + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_mag = sqrt(v1^2 + v2^2 + v3^2) + p = (equations.gamma - 1) * + (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2)) + a_square = equations.gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1^2 + b2^2 + b3^2 + + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2)) + return c_f end - """ calc_fast_wavespeed_roe(u_ll, u_rr, direction, equations::IdealGlmMhdEquations1D) @@ -408,119 +417,118 @@ as given by of Roe Matrices for Systems of Conservation Laws [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773) """ -@inline function calc_fast_wavespeed_roe(u_ll, u_rr, direction, equations::IdealGlmMhdEquations1D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v3_ll = rho_v3_ll/rho_ll - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll) - - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v3_rr = rho_v3_rr/rho_rr - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr) - - # compute total pressure which is thermal + magnetic pressures - p_total_ll = p_ll + 0.5*mag_norm_ll - p_total_rr = p_rr + 0.5*mag_norm_rr - - # compute the Roe density averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) - inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) - rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add - rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add - # Roe averages - # velocities and magnetic fields - v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe - v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe - v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe - B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe - B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe - B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe - # enthalpy - H_ll = (rho_e_ll + p_total_ll) / rho_ll - H_rr = (rho_e_rr + p_total_rr) / rho_rr - H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe - # temporary variable see equations (4.12) in Cargo and Gallice - X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2 - # averaged components needed to compute c_f, the fast magnetoacoustic wave speed - b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum - a_square_roe = ((2.0 - equations.gamma) * X + - (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) - - b_square_roe)) # acoustic speed - # finally compute the average wave speed and set the output velocity - # Ignore orientation since it is always "1" in 1D - c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - - return v1_roe, c_f_roe +@inline function calc_fast_wavespeed_roe(u_ll, u_rr, direction, + equations::IdealGlmMhdEquations1D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + p_ll = (equations.gamma - 1) * + (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + p_rr = (equations.gamma - 1) * + (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr) + + # compute total pressure which is thermal + magnetic pressures + p_total_ll = p_ll + 0.5 * mag_norm_ll + p_total_rr = p_rr + 0.5 * mag_norm_rr + + # compute the Roe density averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) + inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) + rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add + rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add + # Roe averages + # velocities and magnetic fields + v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe + v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe + v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe + B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe + B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe + B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe + # enthalpy + H_ll = (rho_e_ll + p_total_ll) / rho_ll + H_rr = (rho_e_rr + p_total_rr) / rho_rr + H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe + # temporary variable see equations (4.12) in Cargo and Gallice + X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) * + inv_sqrt_rho_add^2 + # averaged components needed to compute c_f, the fast magnetoacoustic wave speed + b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum + a_square_roe = ((2.0 - equations.gamma) * X + + (equations.gamma - 1.0) * + (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) - + b_square_roe)) # acoustic speed + # finally compute the average wave speed and set the output velocity + # Ignore orientation since it is always "1" in 1D + c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + + return v1_roe, c_f_roe end - # Calculate thermodynamic entropy for a conservative state `cons` @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations1D) - # Pressure - p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] - - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)) + # Pressure + p = (equations.gamma - 1) * + (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] + - + 1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)) - # Thermodynamic entropy - s = log(p) - equations.gamma*log(cons[1]) + # Thermodynamic entropy + s = log(p) - equations.gamma * log(cons[1]) - return s + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::IdealGlmMhdEquations1D) - S = -entropy_thermodynamic(cons, equations) * cons[1] / (equations.gamma - 1) + S = -entropy_thermodynamic(cons, equations) * cons[1] / (equations.gamma - 1) - return S + return S end - # Default entropy is the mathematical entropy @inline entropy(cons, equations::IdealGlmMhdEquations1D) = entropy_math(cons, equations) - # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::IdealGlmMhdEquations1D) = cons[5] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations1D) - return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1] + return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] end - # Calculate the magnetic energy for a conservative state `cons'. # OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy @inline function energy_magnetic(cons, ::IdealGlmMhdEquations1D) - return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) + return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::IdealGlmMhdEquations1D) - return (energy_total(cons, equations) - - energy_kinetic(cons, equations) - - energy_magnetic(cons, equations)) + return (energy_total(cons, equations) + - + energy_kinetic(cons, equations) + - + energy_magnetic(cons, equations)) end - # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons' @inline function cross_helicity(cons, ::IdealGlmMhdEquations1D) - return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1] + return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1] end - - end # @muladd diff --git a/src/equations/ideal_glm_mhd_2d.jl b/src/equations/ideal_glm_mhd_2d.jl index c19273737ef..fb3048fe883 100644 --- a/src/equations/ideal_glm_mhd_2d.jl +++ b/src/equations/ideal_glm_mhd_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" IdealGlmMhdEquations2D(gamma) @@ -11,28 +11,33 @@ The ideal compressible GLM-MHD equations for an ideal gas with ratio of specific heats `gamma` in two space dimensions. """ -mutable struct IdealGlmMhdEquations2D{RealT<:Real} <: AbstractIdealGlmMhdEquations{2, 9} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - c_h::RealT # GLM cleaning speed - - function IdealGlmMhdEquations2D(gamma, c_h) - γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h) - new{typeof(γ)}(γ, inv_gamma_minus_one, c_h) - end +mutable struct IdealGlmMhdEquations2D{RealT <: Real} <: + AbstractIdealGlmMhdEquations{2, 9} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + c_h::RealT # GLM cleaning speed + + function IdealGlmMhdEquations2D(gamma, c_h) + γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h) + new{typeof(γ)}(γ, inv_gamma_minus_one, c_h) + end end -function IdealGlmMhdEquations2D(gamma; initial_c_h=convert(typeof(gamma), NaN)) - # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type - IdealGlmMhdEquations2D(promote(gamma, initial_c_h)...) +function IdealGlmMhdEquations2D(gamma; initial_c_h = convert(typeof(gamma), NaN)) + # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type + IdealGlmMhdEquations2D(promote(gamma, initial_c_h)...) end - have_nonconservative_terms(::IdealGlmMhdEquations2D) = True() -varnames(::typeof(cons2cons), ::IdealGlmMhdEquations2D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") -varnames(::typeof(cons2prim), ::IdealGlmMhdEquations2D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") -default_analysis_integrals(::IdealGlmMhdEquations2D) = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) - +function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations2D) + ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") +end +function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations2D) + ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") +end +function default_analysis_integrals(::IdealGlmMhdEquations2D) + (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) +end # Set initial conditions at physical location `x` for time `t` """ @@ -41,43 +46,41 @@ default_analysis_integrals(::IdealGlmMhdEquations2D) = (entropy_timederivative, A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::IdealGlmMhdEquations2D) - rho = 1.0 - rho_v1 = 0.1 - rho_v2 = -0.2 - rho_v3 = -0.5 - rho_e = 50.0 - B1 = 3.0 - B2 = -1.2 - B3 = 0.5 - psi = 0.0 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) + rho = 1.0 + rho_v1 = 0.1 + rho_v2 = -0.2 + rho_v3 = -0.5 + rho_e = 50.0 + B1 = 3.0 + B2 = -1.2 + B3 = 0.5 + psi = 0.0 + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) end - """ initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations2D) An Alfvén wave as smooth initial condition used for convergence tests. """ function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations2D) - # smooth Alfvén wave test from Derigs et al. FLASH (2016) - # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3 - alpha = 0.25*pi - x_perp = x[1]*cos(alpha) + x[2]*sin(alpha) - B_perp = 0.1*sin(2.0*pi*x_perp) - rho = 1.0 - v1 = -B_perp*sin(alpha) - v2 = B_perp*cos(alpha) - v3 = 0.1*cos(2.0*pi*x_perp) - p = 0.1 - B1 = cos(alpha) + v1 - B2 = sin(alpha) + v2 - B3 = v3 - psi = 0.0 - return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) + # smooth Alfvén wave test from Derigs et al. FLASH (2016) + # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3 + alpha = 0.25 * pi + x_perp = x[1] * cos(alpha) + x[2] * sin(alpha) + B_perp = 0.1 * sin(2.0 * pi * x_perp) + rho = 1.0 + v1 = -B_perp * sin(alpha) + v2 = B_perp * cos(alpha) + v3 = 0.1 * cos(2.0 * pi * x_perp) + p = 0.1 + B1 = cos(alpha) + v1 + B2 = sin(alpha) + v2 + B3 = v3 + psi = 0.0 + return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) end - """ initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations2D) @@ -87,96 +90,98 @@ A weak blast wave adapted from [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations2D) - # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Same discontinuity in the velocities but with magnetic fields - # Set up polar coordinates - inicenter = (0, 0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - r = sqrt(x_norm^2 + y_norm^2) - phi = atan(y_norm, x_norm) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) - v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations) + # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Same discontinuity in the velocities but with magnetic fields + # Set up polar coordinates + inicenter = (0, 0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + phi = atan(y_norm, x_norm) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) + v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations) end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::IdealGlmMhdEquations2D) - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) - p = (equations.gamma - 1) * p_over_gamma_minus_one - if orientation == 1 - f1 = rho_v1 - f2 = rho_v1*v1 + p + mag_en - B1^2 - f3 = rho_v1*v2 - B1*B2 - f4 = rho_v1*v3 - B1*B3 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B1 - f6 = equations.c_h*psi - f7 = v1*B2 - v2*B1 - f8 = v1*B3 - v3*B1 - f9 = equations.c_h*B1 - else #if orientation == 2 - f1 = rho_v2 - f2 = rho_v2*v1 - B2*B1 - f3 = rho_v2*v2 + p + mag_en - B2^2 - f4 = rho_v2*v3 - B2*B3 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B2 - f6 = v2*B1 - v1*B2 - f7 = equations.c_h*psi - f8 = v2*B3 - v3*B2 - f9 = equations.c_h*B2 - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) + p = (equations.gamma - 1) * p_over_gamma_minus_one + if orientation == 1 + f1 = rho_v1 + f2 = rho_v1 * v1 + p + mag_en - B1^2 + f3 = rho_v1 * v2 - B1 * B2 + f4 = rho_v1 * v3 - B1 * B3 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 - + B1 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B1 + f6 = equations.c_h * psi + f7 = v1 * B2 - v2 * B1 + f8 = v1 * B3 - v3 * B1 + f9 = equations.c_h * B1 + else #if orientation == 2 + f1 = rho_v2 + f2 = rho_v2 * v1 - B2 * B1 + f3 = rho_v2 * v2 + p + mag_en - B2^2 + f4 = rho_v2 * v3 - B2 * B3 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v2 - + B2 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B2 + f6 = v2 * B1 - v1 * B2 + f7 = equations.c_h * psi + f8 = v2 * B3 - v3 * B2 + f9 = equations.c_h * B2 + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) - p = (equations.gamma - 1) * p_over_gamma_minus_one - - v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] - B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] - rho_v_normal = rho * v_normal - - f1 = rho_v_normal - f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1] - f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2] - f4 = rho_v_normal * v3 - B3 * B_normal - f5 = ( (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en) * v_normal - - B_normal * (v1*B1 + v2*B2 + v3*B3) + equations.c_h * psi * B_normal ) - f6 = equations.c_h * psi * normal_direction[1] + (v2 * B1 - v1 * B2) * normal_direction[2] - f7 = equations.c_h * psi * normal_direction[2] + (v1 * B2 - v2 * B1) * normal_direction[1] - f8 = v_normal * B3 - v3 * B_normal - f9 = equations.c_h * B_normal - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +@inline function flux(u, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations2D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) + p = (equations.gamma - 1) * p_over_gamma_minus_one + + v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] + rho_v_normal = rho * v_normal + + f1 = rho_v_normal + f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1] + f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2] + f4 = rho_v_normal * v3 - B3 * B_normal + f5 = ((kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v_normal + - + B_normal * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B_normal) + f6 = equations.c_h * psi * normal_direction[1] + + (v2 * B1 - v1 * B2) * normal_direction[2] + f7 = equations.c_h * psi * normal_direction[2] + + (v1 * B2 - v2 * B1) * normal_direction[1] + f8 = v_normal * B3 - v3 * B_normal + f9 = equations.c_h * B_normal + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - - """ flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) @@ -203,78 +208,77 @@ terms. """ @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll - - # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) - # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) - if orientation == 1 - f = SVector(0, - B1_ll * B1_rr, - B2_ll * B1_rr, - B3_ll * B1_rr, - v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, - v1_ll * B1_rr, - v2_ll * B1_rr, - v3_ll * B1_rr, - v1_ll * psi_rr) - else # orientation == 2 - f = SVector(0, - B1_ll * B2_rr, - B2_ll * B2_rr, - B3_ll * B2_rr, - v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, - v1_ll * B2_rr, - v2_ll * B2_rr, - v3_ll * B2_rr, - v2_ll * psi_rr) - end - - return f + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + + # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) + # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) + if orientation == 1 + f = SVector(0, + B1_ll * B1_rr, + B2_ll * B1_rr, + B3_ll * B1_rr, + v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, + v1_ll * B1_rr, + v2_ll * B1_rr, + v3_ll * B1_rr, + v1_ll * psi_rr) + else # orientation == 2 + f = SVector(0, + B1_ll * B2_rr, + B2_ll * B2_rr, + B3_ll * B2_rr, + v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, + v1_ll * B2_rr, + v2_ll * B2_rr, + v3_ll * B2_rr, + v2_ll * psi_rr) + end + + return f end @inline function flux_nonconservative_powell(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll - - # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector - # at the same node location) while `B_dot_n_rr` uses the averaged normal - # direction. The reason for this is that `v_dot_n_ll` depends only on the left - # state and multiplies some gradient while `B_dot_n_rr` is used to compute - # the divergence of B. - v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] - B_dot_n_rr = B1_rr * normal_direction_average[1] + B2_rr * normal_direction_average[2] - - # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) - # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) - f = SVector(0, - B1_ll * B_dot_n_rr, - B2_ll * B_dot_n_rr, - B3_ll * B_dot_n_rr, - v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr, - v1_ll * B_dot_n_rr, - v2_ll * B_dot_n_rr, - v3_ll * B_dot_n_rr, - v_dot_n_ll * psi_rr) - - return f + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + + # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector + # at the same node location) while `B_dot_n_rr` uses the averaged normal + # direction. The reason for this is that `v_dot_n_ll` depends only on the left + # state and multiplies some gradient while `B_dot_n_rr` is used to compute + # the divergence of B. + v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] + B_dot_n_rr = B1_rr * normal_direction_average[1] + + B2_rr * normal_direction_average[2] + + # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) + # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) + f = SVector(0, + B1_ll * B_dot_n_rr, + B2_ll * B_dot_n_rr, + B3_ll * B_dot_n_rr, + v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr, + v1_ll * B_dot_n_rr, + v2_ll * B_dot_n_rr, + v3_ll * B_dot_n_rr, + v_dot_n_ll * psi_rr) + + return f end - - """ flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations2D) @@ -284,83 +288,89 @@ Entropy conserving two-point flux by divergence diminishing ideal magnetohydrodynamics equations [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002) """ -function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v3_ll = rho_v3_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v3_rr = rho_v3_rr/rho_rr - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2) - p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2) - beta_ll = 0.5*rho_ll/p_ll - beta_rr = 0.5*rho_rr/p_rr - # for convenience store v⋅B - vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll - vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr - - # Compute the necessary mean values needed for either direction - rho_avg = 0.5*(rho_ll+rho_rr) - rho_mean = ln_mean(rho_ll,rho_rr) - beta_mean = ln_mean(beta_ll,beta_rr) - beta_avg = 0.5*(beta_ll+beta_rr) - v1_avg = 0.5*(v1_ll+v1_rr) - v2_avg = 0.5*(v2_ll+v2_rr) - v3_avg = 0.5*(v3_ll+v3_rr) - p_mean = 0.5*rho_avg/beta_avg - B1_avg = 0.5*(B1_ll+B1_rr) - B2_avg = 0.5*(B2_ll+B2_rr) - B3_avg = 0.5*(B3_ll+B3_rr) - psi_avg = 0.5*(psi_ll+psi_rr) - vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr) - mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr) - vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr) - - # Calculate fluxes depending on orientation with specific direction averages - if orientation == 1 - f1 = rho_mean*v1_avg - f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg - f3 = f1*v2_avg - B1_avg*B2_avg - f4 = f1*v3_avg - B1_avg*B3_avg - f6 = equations.c_h*psi_avg - f7 = v1_avg*B2_avg - v2_avg*B1_avg - f8 = v1_avg*B3_avg - v3_avg*B1_avg - f9 = equations.c_h*B1_avg - # total energy flux is complicated and involves the previous eight components - psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr) - v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v1_mag_avg + - B1_avg*vel_dot_mag_avg - equations.c_h*psi_B1_avg) - else - f1 = rho_mean*v2_avg - f2 = f1*v1_avg - B1_avg*B2_avg - f3 = f1*v2_avg + p_mean + 0.5*mag_norm_avg - B2_avg*B2_avg - f4 = f1*v3_avg - B2_avg*B3_avg - f6 = v2_avg*B1_avg - v1_avg*B2_avg - f7 = equations.c_h*psi_avg - f8 = v2_avg*B3_avg - v3_avg*B2_avg - f9 = equations.c_h*B2_avg - # total energy flux is complicated and involves the previous eight components - psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr) - v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v2_mag_avg + - B2_avg*vel_dot_mag_avg - equations.c_h*psi_B2_avg) - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +function flux_derigs_etal(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations2D) + # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + p_ll = (equations.gamma - 1) * + (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2) + p_rr = (equations.gamma - 1) * + (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2) + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + # for convenience store v⋅B + vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr + + # Compute the necessary mean values needed for either direction + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_mean = 0.5 * rho_avg / beta_avg + B1_avg = 0.5 * (B1_ll + B1_rr) + B2_avg = 0.5 * (B2_ll + B2_rr) + B3_avg = 0.5 * (B3_ll + B3_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) + mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) + vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) + + # Calculate fluxes depending on orientation with specific direction averages + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg + f3 = f1 * v2_avg - B1_avg * B2_avg + f4 = f1 * v3_avg - B1_avg * B3_avg + f6 = equations.c_h * psi_avg + f7 = v1_avg * B2_avg - v2_avg * B1_avg + f8 = v1_avg * B3_avg - v3_avg * B1_avg + f9 = equations.c_h * B1_avg + # total energy flux is complicated and involves the previous eight components + psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr) + v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg - + 0.5 * v1_mag_avg + + B1_avg * vel_dot_mag_avg - equations.c_h * psi_B1_avg) + else + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg - B1_avg * B2_avg + f3 = f1 * v2_avg + p_mean + 0.5 * mag_norm_avg - B2_avg * B2_avg + f4 = f1 * v3_avg - B2_avg * B3_avg + f6 = v2_avg * B1_avg - v1_avg * B2_avg + f7 = equations.c_h * psi_avg + f8 = v2_avg * B3_avg - v3_avg * B2_avg + f9 = equations.c_h * B2_avg + # total energy flux is complicated and involves the previous eight components + psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr) + v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg - + 0.5 * v2_mag_avg + + B2_avg * vel_dot_mag_avg - equations.c_h * psi_B2_avg) + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - """ flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations2D) @@ -383,172 +393,198 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - # Unpack left and right states - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values needed for either direction - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - # Calculate fluxes depending on orientation with specific direction averages - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) - #f5 below - f6 = equations.c_h * psi_avg - f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) - f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) - f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v1_rr + p_rr * v1_ll - + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) - + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) - - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) - - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) - + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) ) - else # orientation == 2 - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) - f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) - #f5 below - f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) - f7 = equations.c_h * psi_avg - f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) - f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v2_rr + p_rr * v2_ll - + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) - + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) - - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) - - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) - + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) ) - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations2D) + # Unpack left and right states + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, + equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, + equations) + + # Compute the necessary mean values needed for either direction + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + # Calculate fluxes depending on orientation with specific direction averages + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + magnetic_square_avg - + 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) + #f5 below + f6 = equations.c_h * psi_avg + f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) + f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) + f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v1_rr + p_rr * v1_ll + + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) + + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) + - + (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) + - + (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) + + + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll))) + else # orientation == 2 + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) + f3 = f1 * v2_avg + p_avg + magnetic_square_avg - + 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) + #f5 below + f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) + f7 = equations.c_h * psi_avg + f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) + f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v2_rr + p_rr * v2_ll + + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) + + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) + - + (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) + - + (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) + + + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll))) + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end @inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - # Unpack left and right states - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] - B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] - - # Compute the necessary mean values needed for either direction - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = ( f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1] - - 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll) ) - f3 = ( f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2] - - 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll) ) - f4 = ( f1 * v3_avg - - 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll) ) - #f5 below - f6 = ( equations.c_h * psi_avg * normal_direction[1] - + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll + - v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr) ) - f7 = ( equations.c_h * psi_avg * normal_direction[2] - + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll + - v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr) ) - f8 = + 0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll + - v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr) - f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v_dot_n_rr + p_rr * v_dot_n_ll - + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll) - + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll) - + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll) - - (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll) - - (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll) - - (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll) - + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll) ) ) - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) + # Unpack left and right states + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, + equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, + equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] + B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] + + # Compute the necessary mean values needed for either direction + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = (f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1] + - + 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll)) + f3 = (f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2] + - + 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll)) + f4 = (f1 * v3_avg + - + 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll)) + #f5 below + f6 = (equations.c_h * psi_avg * normal_direction[1] + + + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll + + v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr)) + f7 = (equations.c_h * psi_avg * normal_direction[2] + + + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll + + v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr)) + f8 = +0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll + + v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr) + f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v_dot_n_rr + p_rr * v_dot_n_ll + + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll) + + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll) + + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll) + - + (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll) + - + (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll) + - + (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll) + + + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll))) + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr - - # Calculate the left/right velocities and fast magnetoacoustic wave speeds - if orientation == 1 - v_ll = rho_v1_ll / rho_ll - v_rr = rho_v1_rr / rho_rr - else # orientation == 2 - v_ll = rho_v2_ll / rho_ll - v_rr = rho_v2_rr / rho_rr - end - cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) - cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) - - return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations2D) + rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr + + # Calculate the left/right velocities and fast magnetoacoustic wave speeds + if orientation == 1 + v_ll = rho_v1_ll / rho_ll + v_rr = rho_v1_rr / rho_rr + else # orientation == 2 + v_ll = rho_v2_ll / rho_ll + v_rr = rho_v2_rr / rho_rr + end + cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) + cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) + + return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - # return max(v_mag_ll, v_mag_rr) + max(cf_ll, cf_rr) - rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr - - # Calculate normal velocities and fast magnetoacoustic wave speeds - # left - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v_ll = ( v1_ll * normal_direction[1] - + v2_ll * normal_direction[2] ) - cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) - # right - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v_rr = ( v1_rr * normal_direction[1] - + v2_rr * normal_direction[2] ) - cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) - - # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref) - return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations2D) + # return max(v_mag_ll, v_mag_rr) + max(cf_ll, cf_rr) + rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr + + # Calculate normal velocities and fast magnetoacoustic wave speeds + # left + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v_ll = (v1_ll * normal_direction[1] + + + v2_ll * normal_direction[2]) + cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) + # right + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v_rr = (v1_rr * normal_direction[1] + + + v2_rr * normal_direction[2]) + cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) + + # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref) + return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end - """ min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations2D) @@ -557,295 +593,298 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in An HLLC Riemann solver for magneto-hydrodynamics [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020) """ -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr - - # Calculate primitive velocity variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - - # Approximate the left-most and right-most eigenvalues in the Riemann fan - if orientation == 1 # x-direction - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) - else # y-direction - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe) - end - - return λ_min, λ_max +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations2D) + rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr + + # Calculate primitive velocity variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + + # Approximate the left-most and right-most eigenvalues in the Riemann fan + if orientation == 1 # x-direction + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) + else # y-direction + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe) + end + + return λ_min, λ_max end @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr + rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr - # Calculate primitive velocity variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll + # Calculate primitive velocity variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr - v_normal_ll = (v1_ll * normal_direction[1] + - v2_ll * normal_direction[2]) - v_normal_rr = (v1_rr * normal_direction[1] + - v2_rr * normal_direction[2]) + v_normal_ll = (v1_ll * normal_direction[1] + + v2_ll * normal_direction[2]) + v_normal_rr = (v1_rr * normal_direction[1] + + v2_rr * normal_direction[2]) - c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) - c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) - v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations) + c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) + c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) + v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations) - # Estimate the min/max eigenvalues in the normal direction - λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe) - λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe) + # Estimate the min/max eigenvalues in the normal direction + λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe) + λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe) - return λ_min, λ_max + return λ_min, λ_max end - # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction # has been normalized prior to this rotation of the state vector @inline function rotate_to_x(u, normal_vector, equations::IdealGlmMhdEquations2D) - # cos and sin of the angle between the x-axis and the normalized normal_vector are - # the normalized vector's x and y coordinates respectively (see unit circle). - c = normal_vector[1] - s = normal_vector[2] - - # Apply the 2D rotation matrix with normal and tangent directions of the form - # [ 1 0 0 0 0 0 0 0 0; - # 0 n_1 n_2 0 0 0 0 0 0; - # 0 t_1 t_2 0 0 0 0 0 0; - # 0 0 0 1 0 0 0 0 0; - # 0 0 0 0 1 0 0 0 0; - # 0 0 0 0 0 n_1 n_2 0 0; - # 0 0 0 0 0 t_1 t_2 0 0; - # 0 0 0 0 0 0 0 1 0; - # 0 0 0 0 0 0 0 0 1 ] - # where t_1 = -n_2 and t_2 = n_1. - # Note for IdealGlmMhdEquations2D only the velocities and magnetic field variables rotate - - return SVector(u[1], - c * u[2] + s * u[3], - -s * u[2] + c * u[3], - u[4], - u[5], - c * u[6] + s * u[7], - -s * u[6] + c * u[7], - u[8], - u[9]) + # cos and sin of the angle between the x-axis and the normalized normal_vector are + # the normalized vector's x and y coordinates respectively (see unit circle). + c = normal_vector[1] + s = normal_vector[2] + + # Apply the 2D rotation matrix with normal and tangent directions of the form + # [ 1 0 0 0 0 0 0 0 0; + # 0 n_1 n_2 0 0 0 0 0 0; + # 0 t_1 t_2 0 0 0 0 0 0; + # 0 0 0 1 0 0 0 0 0; + # 0 0 0 0 1 0 0 0 0; + # 0 0 0 0 0 n_1 n_2 0 0; + # 0 0 0 0 0 t_1 t_2 0 0; + # 0 0 0 0 0 0 0 1 0; + # 0 0 0 0 0 0 0 0 1 ] + # where t_1 = -n_2 and t_2 = n_1. + # Note for IdealGlmMhdEquations2D only the velocities and magnetic field variables rotate + + return SVector(u[1], + c * u[2] + s * u[3], + -s * u[2] + c * u[3], + u[4], + u[5], + c * u[6] + s * u[7], + -s * u[6] + c * u[7], + u[8], + u[9]) end - # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction # has been normalized prior to this back-rotation of the state vector @inline function rotate_from_x(u, normal_vector, equations::IdealGlmMhdEquations2D) - # cos and sin of the angle between the x-axis and the normalized normal_vector are - # the normalized vector's x and y coordinates respectively (see unit circle). - c = normal_vector[1] - s = normal_vector[2] - - # Apply the 2D back-rotation matrix with normal and tangent directions of the form - # [ 1 0 0 0 0 0 0 0 0; - # 0 n_1 t_1 0 0 0 0 0 0; - # 0 n_2 t_2 0 0 0 0 0 0; - # 0 0 0 1 0 0 0 0 0; - # 0 0 0 0 1 0 0 0 0; - # 0 0 0 0 0 n_1 t_1 0 0; - # 0 0 0 0 0 n_2 t_2 0 0; - # 0 0 0 0 0 0 0 1 0; - # 0 0 0 0 0 0 0 0 1 ] - # where t_1 = -n_2 and t_2 = n_1. - # Note for IdealGlmMhdEquations2D the velocities and magnetic field variables back-rotate - - return SVector(u[1], - c * u[2] - s * u[3], - s * u[2] + c * u[3], - u[4], - u[5], - c * u[6] - s * u[7], - s * u[6] + c * u[7], - u[8], - u[9]) + # cos and sin of the angle between the x-axis and the normalized normal_vector are + # the normalized vector's x and y coordinates respectively (see unit circle). + c = normal_vector[1] + s = normal_vector[2] + + # Apply the 2D back-rotation matrix with normal and tangent directions of the form + # [ 1 0 0 0 0 0 0 0 0; + # 0 n_1 t_1 0 0 0 0 0 0; + # 0 n_2 t_2 0 0 0 0 0 0; + # 0 0 0 1 0 0 0 0 0; + # 0 0 0 0 1 0 0 0 0; + # 0 0 0 0 0 n_1 t_1 0 0; + # 0 0 0 0 0 n_2 t_2 0 0; + # 0 0 0 0 0 0 0 1 0; + # 0 0 0 0 0 0 0 0 1 ] + # where t_1 = -n_2 and t_2 = n_1. + # Note for IdealGlmMhdEquations2D the velocities and magnetic field variables back-rotate + + return SVector(u[1], + c * u[2] - s * u[3], + s * u[2] + c * u[3], + u[4], + u[5], + c * u[6] - s * u[7], + s * u[6] + c * u[7], + u[8], + u[9]) end - - @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, _ = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - cf_x_direction = calc_fast_wavespeed(u, 1, equations) - cf_y_direction = calc_fast_wavespeed(u, 2, equations) - - return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction + rho, rho_v1, rho_v2, rho_v3, _ = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + cf_x_direction = calc_fast_wavespeed(u, 1, equations) + cf_y_direction = calc_fast_wavespeed(u, 2, equations) + + return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 - + B1 * B1 + B2 * B2 + B3 * B3 - + psi * psi)) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * (rho_e - + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 + + B1 * B1 + B2 * B2 + B3 * B3 + + psi * psi)) - return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi) + return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi) end - # Convert conservative variables to entropy variables @inline function cons2entropy(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p - - w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = rho_p * v2 - w4 = rho_p * v3 - w5 = -rho_p - w6 = rho_p * B1 - w7 = rho_p * B2 - w8 = rho_p * B3 - w9 = rho_p * psi - - return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + p = (equations.gamma - 1) * + (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p + + w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = rho_p * v2 + w4 = rho_p * v3 + w5 = -rho_p + w6 = rho_p * B1 + w7 = rho_p * B2 + w8 = rho_p * B3 + w9 = rho_p * psi + + return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9) end # Convert entropy variables to conservative variables @inline function entropy2cons(w, equations::IdealGlmMhdEquations2D) - w1, w2, w3, w4, w5, w6, w7, w8, w9 = w - - v1 = - w2 / w5 - v2 = - w3 / w5 - v3 = - w4 / w5 - - B1 = - w6 / w5 - B2 = - w7 / w5 - B3 = - w8 / w5 - psi = - w9 / w5 - - # This imitates what is done for compressible Euler 3D `entropy2cons`: we convert from - # the entropy variables for `-rho * s / (gamma - 1)` to the entropy variables for the entropy - # `-rho * s` used by Hughes, Franca, Mallet (1986). - @unpack gamma = equations - V1, V2, V3, V4, V5 = SVector(w1, w2, w3, w4, w5) * (gamma - 1) - s = gamma - V1 + (V2^2 + V3^2 + V4^2)/(2*V5) - rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one) - rho = -rho_iota * V5 - p = -rho / w5 - - return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) + w1, w2, w3, w4, w5, w6, w7, w8, w9 = w + + v1 = -w2 / w5 + v2 = -w3 / w5 + v3 = -w4 / w5 + + B1 = -w6 / w5 + B2 = -w7 / w5 + B3 = -w8 / w5 + psi = -w9 / w5 + + # This imitates what is done for compressible Euler 3D `entropy2cons`: we convert from + # the entropy variables for `-rho * s / (gamma - 1)` to the entropy variables for the entropy + # `-rho * s` used by Hughes, Franca, Mallet (1986). + @unpack gamma = equations + V1, V2, V3, V4, V5 = SVector(w1, w2, w3, w4, w5) * (gamma - 1) + s = gamma - V1 + (V2^2 + V3^2 + V4^2) / (2 * V5) + rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * + exp(-s * equations.inv_gamma_minus_one) + rho = -rho_iota * V5 + p = -rho / w5 + + return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::IdealGlmMhdEquations2D) - rho, v1, v2, v3, p, B1, B2, B3, psi = prim + rho, v1, v2, v3, p, B1, B2, B3, psi = prim - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 - rho_e = p* equations.inv_gamma_minus_one + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) + - 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 + rho_e = p * equations.inv_gamma_minus_one + + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + + 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) end - @inline function density(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - return rho + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + return rho end @inline function pressure(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - - 0.5 * psi^2) - return p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2) + - + 0.5 * psi^2) + return p end @inline function density_pressure(u, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - - 0.5 * psi^2) - return rho * p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2) + - + 0.5 * psi^2) + return rho * p end - # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue -@inline function calc_fast_wavespeed(cons, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) - a_square = equations.gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1 * b1 + b2 * b2 + b3 * b3 - if orientation == 1 # x-direction - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2)) - else - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2)) - end - return c_f +@inline function calc_fast_wavespeed(cons, orientation::Integer, + equations::IdealGlmMhdEquations2D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) + a_square = equations.gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1 * b1 + b2 * b2 + b3 * b3 + if orientation == 1 # x-direction + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2)) + else + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2)) + end + return c_f end -@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) - a_square = equations.gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1 * b1 + b2 * b2 + b3 * b3 - norm_squared = (normal_direction[1] * normal_direction[1] + - normal_direction[2] * normal_direction[2]) - b_dot_n_squared = (b1 * normal_direction[1] + - b2 * normal_direction[2])^2 / norm_squared - - c_f = sqrt( - (0.5 * (a_square + b_square) + - 0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * norm_squared) - return c_f +@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations2D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) + a_square = equations.gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1 * b1 + b2 * b2 + b3 * b3 + norm_squared = (normal_direction[1] * normal_direction[1] + + normal_direction[2] * normal_direction[2]) + b_dot_n_squared = (b1 * normal_direction[1] + + b2 * normal_direction[2])^2 / norm_squared + + c_f = sqrt((0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * + norm_squared) + return c_f end - """ calc_fast_wavespeed_roe(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations2D) @@ -856,193 +895,202 @@ as given by of Roe Matrices for Systems of Conservation Laws [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773) """ -@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) - mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll - p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) - mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr - p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2) - - # compute total pressure which is thermal + magnetic pressures - p_total_ll = p_ll + 0.5 * mag_norm_ll - p_total_rr = p_rr + 0.5 * mag_norm_rr - - # compute the Roe density averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) - inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) - rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add - rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add - # Roe averages - # velocities and magnetic fields - v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe - v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe - v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe - B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe - B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe - B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe - # enthalpy - H_ll = (rho_e_ll + p_total_ll) / rho_ll - H_rr = (rho_e_rr + p_total_rr) / rho_rr - H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe - # temporary variable see equation (4.12) in Cargo and Gallice - X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2 - # averaged components needed to compute c_f, the fast magnetoacoustic wave speed - b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum - a_square_roe = ((2.0 - equations.gamma) * X + - (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) - - b_square_roe)) # acoustic speed - # finally compute the average wave speed and set the output velocity (depends on orientation) - if orientation == 1 # x-direction - c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - vel_out_roe = v1_roe - else # y-direction - c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - vel_out_roe = v2_roe - end - - return vel_out_roe, c_f_roe +@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations2D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) + mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll + p_ll = (equations.gamma - 1) * + (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) + mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr + p_rr = (equations.gamma - 1) * + (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2) + + # compute total pressure which is thermal + magnetic pressures + p_total_ll = p_ll + 0.5 * mag_norm_ll + p_total_rr = p_rr + 0.5 * mag_norm_rr + + # compute the Roe density averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) + inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) + rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add + rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add + # Roe averages + # velocities and magnetic fields + v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe + v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe + v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe + B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe + B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe + B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe + # enthalpy + H_ll = (rho_e_ll + p_total_ll) / rho_ll + H_rr = (rho_e_rr + p_total_rr) / rho_rr + H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe + # temporary variable see equation (4.12) in Cargo and Gallice + X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) * + inv_sqrt_rho_add^2 + # averaged components needed to compute c_f, the fast magnetoacoustic wave speed + b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum + a_square_roe = ((2.0 - equations.gamma) * X + + (equations.gamma - 1.0) * + (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) - + b_square_roe)) # acoustic speed + # finally compute the average wave speed and set the output velocity (depends on orientation) + if orientation == 1 # x-direction + c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - + 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + vel_out_roe = v1_roe + else # y-direction + c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - + 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + vel_out_roe = v2_roe + end + + return vel_out_roe, c_f_roe end -@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) - mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll - p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) - mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr - p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2) - - # compute total pressure which is thermal + magnetic pressures - p_total_ll = p_ll + 0.5 * mag_norm_ll - p_total_rr = p_rr + 0.5 * mag_norm_rr - - # compute the Roe density averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) - inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) - rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add - rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add - # Roe averages - # velocities and magnetic fields - v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe - v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe - v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe - B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe - B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe - B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe - # enthalpy - H_ll = (rho_e_ll + p_total_ll) / rho_ll - H_rr = (rho_e_rr + p_total_rr) / rho_rr - H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe - # temporary variable see equation (4.12) in Cargo and Gallice - X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2 - # averaged components needed to compute c_f, the fast magnetoacoustic wave speed - b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum - a_square_roe = ((2.0 - equations.gamma) * X + - (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) - - b_square_roe)) # acoustic speed - - # finally compute the average wave speed and set the output velocity (depends on orientation) - norm_squared = (normal_direction[1] * normal_direction[1] + - normal_direction[2] * normal_direction[2]) - B_roe_dot_n_squared = (B1_roe * normal_direction[1] + - B2_roe * normal_direction[2])^2 / norm_squared - - c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe) - c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared) - vel_out_roe = (v1_roe * normal_direction[1] + - v2_roe * normal_direction[2]) - - return vel_out_roe, c_f_roe +@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations2D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) + mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll + p_ll = (equations.gamma - 1) * + (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) + mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr + p_rr = (equations.gamma - 1) * + (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2) + + # compute total pressure which is thermal + magnetic pressures + p_total_ll = p_ll + 0.5 * mag_norm_ll + p_total_rr = p_rr + 0.5 * mag_norm_rr + + # compute the Roe density averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) + inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) + rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add + rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add + # Roe averages + # velocities and magnetic fields + v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe + v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe + v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe + B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe + B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe + B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe + # enthalpy + H_ll = (rho_e_ll + p_total_ll) / rho_ll + H_rr = (rho_e_rr + p_total_rr) / rho_rr + H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe + # temporary variable see equation (4.12) in Cargo and Gallice + X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) * + inv_sqrt_rho_add^2 + # averaged components needed to compute c_f, the fast magnetoacoustic wave speed + b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum + a_square_roe = ((2.0 - equations.gamma) * X + + (equations.gamma - 1.0) * + (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) - + b_square_roe)) # acoustic speed + + # finally compute the average wave speed and set the output velocity (depends on orientation) + norm_squared = (normal_direction[1] * normal_direction[1] + + normal_direction[2] * normal_direction[2]) + B_roe_dot_n_squared = (B1_roe * normal_direction[1] + + B2_roe * normal_direction[2])^2 / norm_squared + + c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared) + vel_out_roe = (v1_roe * normal_direction[1] + + v2_roe * normal_direction[2]) + + return vel_out_roe, c_f_roe end - # Calculate thermodynamic entropy for a conservative state `cons` @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations2D) - # Pressure - p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] - - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2) - - 1/2 * cons[9]^2) - - # Thermodynamic entropy - s = log(p) - equations.gamma*log(cons[1]) - - return s + # Pressure + p = (equations.gamma - 1) * + (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] + - + 1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2) + - + 1 / 2 * cons[9]^2) + + # Thermodynamic entropy + s = log(p) - equations.gamma * log(cons[1]) + + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::IdealGlmMhdEquations2D) - S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one + S = -entropy_thermodynamic(cons, equations) * cons[1] * + equations.inv_gamma_minus_one - return S + return S end - # Default entropy is the mathematical entropy @inline entropy(cons, equations::IdealGlmMhdEquations2D) = entropy_math(cons, equations) - # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::IdealGlmMhdEquations2D) = cons[5] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations2D) - return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1] + return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] end - # Calculate the magnetic energy for a conservative state `cons'. # OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy @inline function energy_magnetic(cons, ::IdealGlmMhdEquations2D) - return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) + return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::IdealGlmMhdEquations2D) - return (energy_total(cons, equations) - - energy_kinetic(cons, equations) - - energy_magnetic(cons, equations) - - cons[9]^2 / 2) + return (energy_total(cons, equations) + - + energy_kinetic(cons, equations) + - + energy_magnetic(cons, equations) + - + cons[9]^2 / 2) end - # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons' @inline function cross_helicity(cons, ::IdealGlmMhdEquations2D) - return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1] + return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1] end - - end # @muladd diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl index 647e6c4f88a..401fcd2daf1 100644 --- a/src/equations/ideal_glm_mhd_3d.jl +++ b/src/equations/ideal_glm_mhd_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" IdealGlmMhdEquations3D(gamma) @@ -11,28 +11,33 @@ The ideal compressible GLM-MHD equations for an ideal gas with ratio of specific heats `gamma` in three space dimensions. """ -mutable struct IdealGlmMhdEquations3D{RealT<:Real} <: AbstractIdealGlmMhdEquations{3, 9} - gamma::RealT # ratio of specific heats - inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications - c_h::RealT # GLM cleaning speed - - function IdealGlmMhdEquations3D(gamma, c_h) - γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h) - new{typeof(γ)}(γ, inv_gamma_minus_one, c_h) - end +mutable struct IdealGlmMhdEquations3D{RealT <: Real} <: + AbstractIdealGlmMhdEquations{3, 9} + gamma::RealT # ratio of specific heats + inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications + c_h::RealT # GLM cleaning speed + + function IdealGlmMhdEquations3D(gamma, c_h) + γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h) + new{typeof(γ)}(γ, inv_gamma_minus_one, c_h) + end end -function IdealGlmMhdEquations3D(gamma; initial_c_h=convert(typeof(gamma), NaN)) - # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type - IdealGlmMhdEquations3D(promote(gamma, initial_c_h)...) +function IdealGlmMhdEquations3D(gamma; initial_c_h = convert(typeof(gamma), NaN)) + # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type + IdealGlmMhdEquations3D(promote(gamma, initial_c_h)...) end - have_nonconservative_terms(::IdealGlmMhdEquations3D) = True() -varnames(::typeof(cons2cons), ::IdealGlmMhdEquations3D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") -varnames(::typeof(cons2prim), ::IdealGlmMhdEquations3D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") -default_analysis_integrals(::IdealGlmMhdEquations3D) = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) - +function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations3D) + ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") +end +function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations3D) + ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") +end +function default_analysis_integrals(::IdealGlmMhdEquations3D) + (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) +end # Set initial conditions at physical location `x` for time `t` """ @@ -41,53 +46,51 @@ initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D) A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D) - rho = 1.0 - rho_v1 = 0.1 - rho_v2 = -0.2 - rho_v3 = -0.5 - rho_e = 50.0 - B1 = 3.0 - B2 = -1.2 - B3 = 0.5 - psi = 0.0 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) + rho = 1.0 + rho_v1 = 0.1 + rho_v2 = -0.2 + rho_v3 = -0.5 + rho_e = 50.0 + B1 = 3.0 + B2 = -1.2 + B3 = 0.5 + psi = 0.0 + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) end - """ initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations3D) An Alfvén wave as smooth initial condition used for convergence tests. """ function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations3D) - # Alfvén wave in three space dimensions - # Altmann thesis http://dx.doi.org/10.18419/opus-3895 - # domain must be set to [-1, 1]^3, γ = 5/3 - p = 1 - omega = 2*pi # may be multiplied by frequency - # r: length-variable = length of computational domain - r = 2 - # e: epsilon = 0.2 - e = 0.2 - nx = 1 / sqrt(r^2 + 1) - ny = r / sqrt(r^2 + 1) - sqr = 1 - Va = omega / (ny * sqr) - phi_alv = omega / ny * (nx * (x[1] - 0.5*r) + ny * (x[2] - 0.5*r)) - Va * t - - rho = 1. - v1 = -e*ny*cos(phi_alv) / rho - v2 = e*nx*cos(phi_alv) / rho - v3 = e * sin(phi_alv) / rho - B1 = nx -rho*v1*sqr - B2 = ny -rho*v2*sqr - B3 = -rho*v3*sqr - psi = 0 - - return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) + # Alfvén wave in three space dimensions + # Altmann thesis http://dx.doi.org/10.18419/opus-3895 + # domain must be set to [-1, 1]^3, γ = 5/3 + p = 1 + omega = 2 * pi # may be multiplied by frequency + # r: length-variable = length of computational domain + r = 2 + # e: epsilon = 0.2 + e = 0.2 + nx = 1 / sqrt(r^2 + 1) + ny = r / sqrt(r^2 + 1) + sqr = 1 + Va = omega / (ny * sqr) + phi_alv = omega / ny * (nx * (x[1] - 0.5 * r) + ny * (x[2] - 0.5 * r)) - Va * t + + rho = 1.0 + v1 = -e * ny * cos(phi_alv) / rho + v2 = e * nx * cos(phi_alv) / rho + v3 = e * sin(phi_alv) / rho + B1 = nx - rho * v1 * sqr + B2 = ny - rho * v2 * sqr + B3 = -rho * v3 * sqr + psi = 0 + + return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations) end - """ initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations3D) @@ -97,115 +100,118 @@ A weak blast wave adapted from [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations3D) - # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Same discontinuity in the velocities but with magnetic fields - # Set up polar coordinates - inicenter = (0, 0, 0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - z_norm = x[3] - inicenter[3] - r = sqrt(x_norm^2 + y_norm^2 + z_norm^2) - phi = atan(y_norm, x_norm) - theta = iszero(r) ? 0.0 : acos(z_norm / r) - - # Calculate primitive variables - rho = r > 0.5 ? 1.0 : 1.1691 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta) - v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta) - v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta) - p = r > 0.5 ? 1.0 : 1.245 - - return prim2cons(SVector(rho, v1, v2, v3, p, 1.0, 1.0, 1.0, 0.0), equations) + # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Same discontinuity in the velocities but with magnetic fields + # Set up polar coordinates + inicenter = (0, 0, 0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + z_norm = x[3] - inicenter[3] + r = sqrt(x_norm^2 + y_norm^2 + z_norm^2) + phi = atan(y_norm, x_norm) + theta = iszero(r) ? 0.0 : acos(z_norm / r) + + # Calculate primitive variables + rho = r > 0.5 ? 1.0 : 1.1691 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta) + v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta) + v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta) + p = r > 0.5 ? 1.0 : 1.245 + + return prim2cons(SVector(rho, v1, v2, v3, p, 1.0, 1.0, 1.0, 0.0), equations) end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::IdealGlmMhdEquations3D) - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) - p = (equations.gamma - 1) * p_over_gamma_minus_one - if orientation == 1 - f1 = rho_v1 - f2 = rho_v1*v1 + p + mag_en - B1^2 - f3 = rho_v1*v2 - B1*B2 - f4 = rho_v1*v3 - B1*B3 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B1 - f6 = equations.c_h*psi - f7 = v1*B2 - v2*B1 - f8 = v1*B3 - v3*B1 - f9 = equations.c_h*B1 - elseif orientation == 2 - f1 = rho_v2 - f2 = rho_v2*v1 - B2*B1 - f3 = rho_v2*v2 + p + mag_en - B2^2 - f4 = rho_v2*v3 - B2*B3 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B2 - f6 = v2*B1 - v1*B2 - f7 = equations.c_h*psi - f8 = v2*B3 - v3*B2 - f9 = equations.c_h*B2 - else - f1 = rho_v3 - f2 = rho_v3*v1 - B3*B1 - f3 = rho_v3*v2 - B3*B2 - f4 = rho_v3*v3 + p + mag_en - B3^2 - f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v3 - B3*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B3 - f6 = v3*B1 - v1*B3 - f7 = v3*B2 - v2*B3 - f8 = equations.c_h*psi - f9 = equations.c_h*B3 - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) + p = (equations.gamma - 1) * p_over_gamma_minus_one + if orientation == 1 + f1 = rho_v1 + f2 = rho_v1 * v1 + p + mag_en - B1^2 + f3 = rho_v1 * v2 - B1 * B2 + f4 = rho_v1 * v3 - B1 * B3 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 - + B1 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B1 + f6 = equations.c_h * psi + f7 = v1 * B2 - v2 * B1 + f8 = v1 * B3 - v3 * B1 + f9 = equations.c_h * B1 + elseif orientation == 2 + f1 = rho_v2 + f2 = rho_v2 * v1 - B2 * B1 + f3 = rho_v2 * v2 + p + mag_en - B2^2 + f4 = rho_v2 * v3 - B2 * B3 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v2 - + B2 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B2 + f6 = v2 * B1 - v1 * B2 + f7 = equations.c_h * psi + f8 = v2 * B3 - v3 * B2 + f9 = equations.c_h * B2 + else + f1 = rho_v3 + f2 = rho_v3 * v1 - B3 * B1 + f3 = rho_v3 * v2 - B3 * B2 + f4 = rho_v3 * v3 + p + mag_en - B3^2 + f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v3 - + B3 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B3 + f6 = v3 * B1 - v1 * B3 + f7 = v3 * B2 - v2 * B3 + f8 = equations.c_h * psi + f9 = equations.c_h * B3 + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) - p = (equations.gamma - 1) * p_over_gamma_minus_one - - v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + v3 * normal_direction[3] - B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] + B3 * normal_direction[3] - rho_v_normal = rho * v_normal - - f1 = rho_v_normal - f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1] - f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2] - f4 = rho_v_normal * v3 - B3 * B_normal + (p + mag_en) * normal_direction[3] - f5 = ( (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en) * v_normal - - B_normal * (v1*B1 + v2*B2 + v3*B3) + equations.c_h * psi * B_normal ) - f6 = ( equations.c_h * psi * normal_direction[1] + - (v2 * B1 - v1 * B2) * normal_direction[2] + - (v3 * B1 - v1 * B3) * normal_direction[3] ) - f7 = ( (v1 * B2 - v2 * B1) * normal_direction[1] + - equations.c_h * psi * normal_direction[2] + - (v3 * B2 - v2 * B3) * normal_direction[3] ) - f8 = ( (v1 * B3 - v3 * B1) * normal_direction[1] + - (v2 * B3 - v3 * B2) * normal_direction[2] + - equations.c_h * psi * normal_direction[3] ) - f9 = equations.c_h * B_normal - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +@inline function flux(u, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations3D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2) + p = (equations.gamma - 1) * p_over_gamma_minus_one + + v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + + v3 * normal_direction[3] + B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] + + B3 * normal_direction[3] + rho_v_normal = rho * v_normal + + f1 = rho_v_normal + f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1] + f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2] + f4 = rho_v_normal * v3 - B3 * B_normal + (p + mag_en) * normal_direction[3] + f5 = ((kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v_normal + - + B_normal * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B_normal) + f6 = (equations.c_h * psi * normal_direction[1] + + (v2 * B1 - v1 * B2) * normal_direction[2] + + (v3 * B1 - v1 * B3) * normal_direction[3]) + f7 = ((v1 * B2 - v2 * B1) * normal_direction[1] + + equations.c_h * psi * normal_direction[2] + + (v3 * B2 - v2 * B3) * normal_direction[3]) + f8 = ((v1 * B3 - v3 * B1) * normal_direction[1] + + (v2 * B3 - v3 * B2) * normal_direction[2] + + equations.c_h * psi * normal_direction[3]) + f9 = equations.c_h * B_normal + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - - """ flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) @@ -232,88 +238,89 @@ terms. """ @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll - - # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) - # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3}) - if orientation == 1 - f = SVector(0, - B1_ll * B1_rr, - B2_ll * B1_rr, - B3_ll * B1_rr, - v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, - v1_ll * B1_rr, - v2_ll * B1_rr, - v3_ll * B1_rr, - v1_ll * psi_rr) - elseif orientation == 2 - f = SVector(0, - B1_ll * B2_rr, - B2_ll * B2_rr, - B3_ll * B2_rr, - v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, - v1_ll * B2_rr, - v2_ll * B2_rr, - v3_ll * B2_rr, - v2_ll * psi_rr) - else # orientation == 3 - f = SVector(0, - B1_ll * B3_rr, - B2_ll * B3_rr, - B3_ll * B3_rr, - v_dot_B_ll * B3_rr + v3_ll * psi_ll * psi_rr, - v1_ll * B3_rr, - v2_ll * B3_rr, - v3_ll * B3_rr, - v3_ll * psi_rr) - end - - return f + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + + # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) + # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3}) + if orientation == 1 + f = SVector(0, + B1_ll * B1_rr, + B2_ll * B1_rr, + B3_ll * B1_rr, + v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, + v1_ll * B1_rr, + v2_ll * B1_rr, + v3_ll * B1_rr, + v1_ll * psi_rr) + elseif orientation == 2 + f = SVector(0, + B1_ll * B2_rr, + B2_ll * B2_rr, + B3_ll * B2_rr, + v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, + v1_ll * B2_rr, + v2_ll * B2_rr, + v3_ll * B2_rr, + v2_ll * psi_rr) + else # orientation == 3 + f = SVector(0, + B1_ll * B3_rr, + B2_ll * B3_rr, + B3_ll * B3_rr, + v_dot_B_ll * B3_rr + v3_ll * psi_ll * psi_rr, + v1_ll * B3_rr, + v2_ll * B3_rr, + v3_ll * B3_rr, + v3_ll * psi_rr) + end + + return f end @inline function flux_nonconservative_powell(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll - - # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector - # at the same node location) while `B_dot_n_rr` uses the averaged normal - # direction. The reason for this is that `v_dot_n_ll` depends only on the left - # state and multiplies some gradient while `B_dot_n_rr` is used to compute - # the divergence of B. - v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] + v3_ll * normal_direction_ll[3] - B_dot_n_rr = B1_rr * normal_direction_average[1] + B2_rr * normal_direction_average[2] + B3_rr * normal_direction_average[3] - - # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) - # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3}) - f = SVector(0, - B1_ll * B_dot_n_rr, - B2_ll * B_dot_n_rr, - B3_ll * B_dot_n_rr, - v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr, - v1_ll * B_dot_n_rr, - v2_ll * B_dot_n_rr, - v3_ll * B_dot_n_rr, - v_dot_n_ll * psi_rr) - - return f + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + + # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector + # at the same node location) while `B_dot_n_rr` uses the averaged normal + # direction. The reason for this is that `v_dot_n_ll` depends only on the left + # state and multiplies some gradient while `B_dot_n_rr` is used to compute + # the divergence of B. + v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] + + v3_ll * normal_direction_ll[3] + B_dot_n_rr = B1_rr * normal_direction_average[1] + + B2_rr * normal_direction_average[2] + + B3_rr * normal_direction_average[3] + + # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) + # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3}) + f = SVector(0, + B1_ll * B_dot_n_rr, + B2_ll * B_dot_n_rr, + B3_ll * B_dot_n_rr, + v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr, + v1_ll * B_dot_n_rr, + v2_ll * B_dot_n_rr, + v3_ll * B_dot_n_rr, + v_dot_n_ll * psi_rr) + + return f end - - """ flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations3D) @@ -323,90 +330,98 @@ Entropy conserving two-point flux by divergence diminishing ideal magnetohydrodynamics equations [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002) """ -function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - beta_ll = 0.5 * rho_ll / p_ll - beta_rr = 0.5 * rho_rr / p_rr - # for convenience store v⋅B - vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll - vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr - - # Compute the necessary mean values needed for either direction - rho_avg = 0.5 * (rho_ll + rho_rr) - rho_mean = ln_mean(rho_ll, rho_rr) - beta_mean = ln_mean(beta_ll, beta_rr) - beta_avg = 0.5 * (beta_ll + beta_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_mean = 0.5 * rho_avg / beta_avg - B1_avg = 0.5 * (B1_ll + B1_rr) - B2_avg = 0.5 * (B2_ll + B2_rr) - B3_avg = 0.5 * (B3_ll + B3_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) - mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) - vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) - - # Calculate fluxes depending on orientation with specific direction averages - if orientation == 1 - f1 = rho_mean*v1_avg - f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg - f3 = f1*v2_avg - B1_avg*B2_avg - f4 = f1*v3_avg - B1_avg*B3_avg - f6 = equations.c_h*psi_avg - f7 = v1_avg*B2_avg - v2_avg*B1_avg - f8 = v1_avg*B3_avg - v3_avg*B1_avg - f9 = equations.c_h*B1_avg - # total energy flux is complicated and involves the previous eight components - psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr) - v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v1_mag_avg + - B1_avg*vel_dot_mag_avg - equations.c_h*psi_B1_avg) - elseif orientation == 2 - f1 = rho_mean*v2_avg - f2 = f1*v1_avg - B2_avg*B1_avg - f3 = f1*v2_avg + p_mean + 0.5*mag_norm_avg - B2_avg*B2_avg - f4 = f1*v3_avg - B2_avg*B3_avg - f6 = v2_avg*B1_avg - v1_avg*B2_avg - f7 = equations.c_h*psi_avg - f8 = v2_avg*B3_avg - v3_avg*B2_avg - f9 = equations.c_h*B2_avg - # total energy flux is complicated and involves the previous eight components - psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr) - v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v2_mag_avg + - B2_avg*vel_dot_mag_avg - equations.c_h*psi_B2_avg) - else - f1 = rho_mean*v3_avg - f2 = f1*v1_avg - B3_avg*B1_avg - f3 = f1*v2_avg - B3_avg*B2_avg - f4 = f1*v3_avg + p_mean + 0.5*mag_norm_avg - B3_avg*B3_avg - f6 = v3_avg*B1_avg - v1_avg*B3_avg - f7 = v3_avg*B2_avg - v2_avg*B3_avg - f8 = equations.c_h*psi_avg - f9 = equations.c_h*B3_avg - # total energy flux is complicated and involves the previous eight components - psi_B3_avg = 0.5*(B3_ll*psi_ll + B3_rr*psi_rr) - v3_mag_avg = 0.5*(v3_ll*mag_norm_ll + v3_rr*mag_norm_rr) - f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg + - f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v3_mag_avg + - B3_avg*vel_dot_mag_avg - equations.c_h*psi_B3_avg) - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +function flux_derigs_etal(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations3D) + # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, + equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, + equations) + + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + beta_ll = 0.5 * rho_ll / p_ll + beta_rr = 0.5 * rho_rr / p_rr + # for convenience store v⋅B + vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr + + # Compute the necessary mean values needed for either direction + rho_avg = 0.5 * (rho_ll + rho_rr) + rho_mean = ln_mean(rho_ll, rho_rr) + beta_mean = ln_mean(beta_ll, beta_rr) + beta_avg = 0.5 * (beta_ll + beta_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_mean = 0.5 * rho_avg / beta_avg + B1_avg = 0.5 * (B1_ll + B1_rr) + B2_avg = 0.5 * (B2_ll + B2_rr) + B3_avg = 0.5 * (B3_ll + B3_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) + mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) + vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) + + # Calculate fluxes depending on orientation with specific direction averages + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg + f3 = f1 * v2_avg - B1_avg * B2_avg + f4 = f1 * v3_avg - B1_avg * B3_avg + f6 = equations.c_h * psi_avg + f7 = v1_avg * B2_avg - v2_avg * B1_avg + f8 = v1_avg * B3_avg - v3_avg * B1_avg + f9 = equations.c_h * B1_avg + # total energy flux is complicated and involves the previous eight components + psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr) + v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg - + 0.5 * v1_mag_avg + + B1_avg * vel_dot_mag_avg - equations.c_h * psi_B1_avg) + elseif orientation == 2 + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg - B2_avg * B1_avg + f3 = f1 * v2_avg + p_mean + 0.5 * mag_norm_avg - B2_avg * B2_avg + f4 = f1 * v3_avg - B2_avg * B3_avg + f6 = v2_avg * B1_avg - v1_avg * B2_avg + f7 = equations.c_h * psi_avg + f8 = v2_avg * B3_avg - v3_avg * B2_avg + f9 = equations.c_h * B2_avg + # total energy flux is complicated and involves the previous eight components + psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr) + v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg - + 0.5 * v2_mag_avg + + B2_avg * vel_dot_mag_avg - equations.c_h * psi_B2_avg) + else + f1 = rho_mean * v3_avg + f2 = f1 * v1_avg - B3_avg * B1_avg + f3 = f1 * v2_avg - B3_avg * B2_avg + f4 = f1 * v3_avg + p_mean + 0.5 * mag_norm_avg - B3_avg * B3_avg + f6 = v3_avg * B1_avg - v1_avg * B3_avg + f7 = v3_avg * B2_avg - v2_avg * B3_avg + f8 = equations.c_h * psi_avg + f9 = equations.c_h * B3_avg + # total energy flux is complicated and involves the previous eight components + psi_B3_avg = 0.5 * (B3_ll * psi_ll + B3_rr * psi_rr) + v3_mag_avg = 0.5 * (v3_ll * mag_norm_ll + v3_rr * mag_norm_rr) + f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) + + f2 * v1_avg + f3 * v2_avg + + f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg - + 0.5 * v3_mag_avg + + B3_avg * vel_dot_mag_avg - equations.c_h * psi_B3_avg) + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - """ flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D) @@ -429,198 +444,232 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - # Unpack left and right states - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - - # Compute the necessary mean values needed for either direction - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - # Calculate fluxes depending on orientation with specific direction averages - if orientation == 1 - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) - #f5 below - f6 = equations.c_h * psi_avg - f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) - f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) - f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v1_rr + p_rr * v1_ll - + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) - + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) - - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) - - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) - + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) ) - elseif orientation == 2 - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) - f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) - #f5 below - f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) - f7 = equations.c_h * psi_avg - f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) - f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v2_rr + p_rr * v2_ll - + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) - + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) - - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) - - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) - + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) ) - else # orientation == 3 - f1 = rho_mean * v3_avg - f2 = f1 * v1_avg - 0.5 * (B3_ll * B1_rr + B3_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B3_ll * B2_rr + B3_rr * B2_ll) - f4 = f1 * v3_avg + p_avg + magnetic_square_avg - 0.5 * (B3_ll * B3_rr + B3_rr * B3_ll) - #f5 below - f6 = 0.5 * (v3_ll * B1_ll - v1_ll * B3_ll + v3_rr * B1_rr - v1_rr * B3_rr) - f7 = 0.5 * (v3_ll * B2_ll - v2_ll * B3_ll + v3_rr * B2_rr - v2_rr * B3_rr) - f8 = equations.c_h * psi_avg - f9 = equations.c_h * 0.5 * (B3_ll + B3_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v3_rr + p_rr * v3_ll - + (v3_ll * B1_ll * B1_rr + v3_rr * B1_rr * B1_ll) - + (v3_ll * B2_ll * B2_rr + v3_rr * B2_rr * B2_ll) - - (v1_ll * B3_ll * B1_rr + v1_rr * B3_rr * B1_ll) - - (v2_ll * B3_ll * B2_rr + v2_rr * B3_rr * B2_ll) - + equations.c_h * (B3_ll * psi_rr + B3_rr * psi_ll) ) ) - end - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations3D) + # Unpack left and right states + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, + equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, + equations) + + # Compute the necessary mean values needed for either direction + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + # Calculate fluxes depending on orientation with specific direction averages + if orientation == 1 + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + magnetic_square_avg - + 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) + #f5 below + f6 = equations.c_h * psi_avg + f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) + f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) + f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v1_rr + p_rr * v1_ll + + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) + + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) + - + (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) + - + (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) + + + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll))) + elseif orientation == 2 + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) + f3 = f1 * v2_avg + p_avg + magnetic_square_avg - + 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) + #f5 below + f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) + f7 = equations.c_h * psi_avg + f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) + f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v2_rr + p_rr * v2_ll + + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) + + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) + - + (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) + - + (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) + + + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll))) + else # orientation == 3 + f1 = rho_mean * v3_avg + f2 = f1 * v1_avg - 0.5 * (B3_ll * B1_rr + B3_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B3_ll * B2_rr + B3_rr * B2_ll) + f4 = f1 * v3_avg + p_avg + magnetic_square_avg - + 0.5 * (B3_ll * B3_rr + B3_rr * B3_ll) + #f5 below + f6 = 0.5 * (v3_ll * B1_ll - v1_ll * B3_ll + v3_rr * B1_rr - v1_rr * B3_rr) + f7 = 0.5 * (v3_ll * B2_ll - v2_ll * B3_ll + v3_rr * B2_rr - v2_rr * B3_rr) + f8 = equations.c_h * psi_avg + f9 = equations.c_h * 0.5 * (B3_ll + B3_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v3_rr + p_rr * v3_ll + + (v3_ll * B1_ll * B1_rr + v3_rr * B1_rr * B1_ll) + + (v3_ll * B2_ll * B2_rr + v3_rr * B2_rr * B2_ll) + - + (v1_ll * B3_ll * B1_rr + v1_rr * B3_rr * B1_ll) + - + (v2_ll * B3_ll * B2_rr + v2_rr * B3_rr * B2_ll) + + + equations.c_h * (B3_ll * psi_rr + B3_rr * psi_ll))) + end + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end -@inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - # Unpack left and right states - rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3] - B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] + B3_ll * normal_direction[3] - B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] + B3_rr * normal_direction[3] - - # Compute the necessary mean values needed for either direction - rho_mean = ln_mean(rho_ll, rho_rr) - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = ( f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1] - - 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll) ) - f3 = ( f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2] - - 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll) ) - f4 = ( f1 * v3_avg + (p_avg + magnetic_square_avg) * normal_direction[3] - - 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll) ) - #f5 below - f6 = ( equations.c_h * psi_avg * normal_direction[1] - + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll + - v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr) ) - f7 = ( equations.c_h * psi_avg * normal_direction[2] - + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll + - v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr) ) - f8 = ( equations.c_h * psi_avg * normal_direction[3] - + 0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll + - v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr) ) - f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v_dot_n_rr + p_rr * v_dot_n_ll - + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll) - + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll) - + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll) - - (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll) - - (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll) - - (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll) - + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll) ) ) - - return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) +@inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations3D) + # Unpack left and right states + rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, + equations) + rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, + equations) + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3] + B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] + + B3_ll * normal_direction[3] + B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] + + B3_rr * normal_direction[3] + + # Compute the necessary mean values needed for either direction + rho_mean = ln_mean(rho_ll, rho_rr) + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = (f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1] + - + 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll)) + f3 = (f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2] + - + 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll)) + f4 = (f1 * v3_avg + (p_avg + magnetic_square_avg) * normal_direction[3] + - + 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll)) + #f5 below + f6 = (equations.c_h * psi_avg * normal_direction[1] + + + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll + + v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr)) + f7 = (equations.c_h * psi_avg * normal_direction[2] + + + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll + + v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr)) + f8 = (equations.c_h * psi_avg * normal_direction[3] + + + 0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll + + v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr)) + f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (+p_ll * v_dot_n_rr + p_rr * v_dot_n_ll + + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll) + + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll) + + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll) + - + (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll) + - + (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll) + - + (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll) + + + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll))) + + return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr - - # Calculate the left/right velocities and fast magnetoacoustic wave speeds - if orientation == 1 - v_ll = rho_v1_ll / rho_ll - v_rr = rho_v1_rr / rho_rr - elseif orientation == 2 - v_ll = rho_v2_ll / rho_ll - v_rr = rho_v2_rr / rho_rr - else # orientation == 3 - v_ll = rho_v3_ll / rho_ll - v_rr = rho_v3_rr / rho_rr - end - cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) - cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) - - return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations3D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr + + # Calculate the left/right velocities and fast magnetoacoustic wave speeds + if orientation == 1 + v_ll = rho_v1_ll / rho_ll + v_rr = rho_v1_rr / rho_rr + elseif orientation == 2 + v_ll = rho_v2_ll / rho_ll + v_rr = rho_v2_rr / rho_rr + else # orientation == 3 + v_ll = rho_v3_ll / rho_ll + v_rr = rho_v3_rr / rho_rr + end + cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) + cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) + + return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end @inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr - - # Calculate normal velocities and fast magnetoacoustic wave speeds - # left - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_ll = ( v1_ll * normal_direction[1] - + v2_ll * normal_direction[2] - + v3_ll * normal_direction[3] ) - cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) - # right - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - v_rr = ( v1_rr * normal_direction[1] - + v2_rr * normal_direction[2] - + v3_rr * normal_direction[3] ) - cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) - - # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref) - return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr + + # Calculate normal velocities and fast magnetoacoustic wave speeds + # left + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_ll = (v1_ll * normal_direction[1] + + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3]) + cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) + # right + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + v_rr = (v1_rr * normal_direction[1] + + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3]) + cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) + + # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref) + return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end - """ min_max_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D) @@ -629,270 +678,277 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in An HLLC Riemann solver for magneto-hydrodynamics [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020) """ -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr - - # Calculate primitive variables and speed of sound - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - - # Approximate the left-most and right-most eigenvalues in the Riemann fan - if orientation == 1 # x-direction - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) - elseif orientation == 2 # y-direction - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe) - else # z-direction - c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) - c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) - vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) - λ_min = min(v3_ll - c_f_ll, vel_roe - c_f_roe) - λ_max = max(v3_rr + c_f_rr, vel_roe + c_f_roe) - end - - return λ_min, λ_max +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations3D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr + + # Calculate primitive variables and speed of sound + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + + # Approximate the left-most and right-most eigenvalues in the Riemann fan + if orientation == 1 # x-direction + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe) + elseif orientation == 2 # y-direction + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe) + else # z-direction + c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations) + c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations) + vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations) + λ_min = min(v3_ll - c_f_ll, vel_roe - c_f_roe) + λ_max = max(v3_rr + c_f_rr, vel_roe + c_f_roe) + end + + return λ_min, λ_max end @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr - - # Calculate primitive velocity variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - - v_normal_ll = (v1_ll * normal_direction[1] + - v2_ll * normal_direction[2] + - v3_ll * normal_direction[3]) - v_normal_rr = (v1_rr * normal_direction[1] + - v2_rr * normal_direction[2] + - v3_rr * normal_direction[3]) - - c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) - c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) - v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations) - - # Estimate the min/max eigenvalues in the normal direction - λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe) - λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe) - - return λ_min, λ_max + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr + + # Calculate primitive velocity variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + + v_normal_ll = (v1_ll * normal_direction[1] + + v2_ll * normal_direction[2] + + v3_ll * normal_direction[3]) + v_normal_rr = (v1_rr * normal_direction[1] + + v2_rr * normal_direction[2] + + v3_rr * normal_direction[3]) + + c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations) + c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations) + v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations) + + # Estimate the min/max eigenvalues in the normal direction + λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe) + λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe) + + return λ_min, λ_max end - # Rotate normal vector to x-axis; normal, tangent1 and tangent2 need to be orthonormal # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions # has been normalized prior to this rotation of the state vector # Note, for ideal GLM-MHD only the velocities and magnetic field variables rotate -@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, equations::IdealGlmMhdEquations3D) - # Multiply with [ 1 0 0 0 0 0 0 0 0; - # 0 ― normal_vector ― 0 0 0 0 0; - # 0 ― tangent1 ― 0 0 0 0 0; - # 0 ― tangent2 ― 0 0 0 0 0; - # 0 0 0 0 1 0 0 0 0; - # 0 0 0 0 0 ― normal_vector ― 0; - # 0 0 0 0 0 ― tangent1 ― 0; - # 0 0 0 0 0 ― tangent2 ― 0; - # 0 0 0 0 0 0 0 0 1 ] - return SVector(u[1], - normal_vector[1] * u[2] + normal_vector[2] * u[3] + normal_vector[3] * u[4], - tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4], - tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4], - u[5], - normal_vector[1] * u[6] + normal_vector[2] * u[7] + normal_vector[3] * u[8], - tangent1[1] * u[6] + tangent1[2] * u[7] + tangent1[3] * u[8], - tangent2[1] * u[6] + tangent2[2] * u[7] + tangent2[3] * u[8], - u[9]) - +@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, + equations::IdealGlmMhdEquations3D) + # Multiply with [ 1 0 0 0 0 0 0 0 0; + # 0 ― normal_vector ― 0 0 0 0 0; + # 0 ― tangent1 ― 0 0 0 0 0; + # 0 ― tangent2 ― 0 0 0 0 0; + # 0 0 0 0 1 0 0 0 0; + # 0 0 0 0 0 ― normal_vector ― 0; + # 0 0 0 0 0 ― tangent1 ― 0; + # 0 0 0 0 0 ― tangent2 ― 0; + # 0 0 0 0 0 0 0 0 1 ] + return SVector(u[1], + normal_vector[1] * u[2] + normal_vector[2] * u[3] + + normal_vector[3] * u[4], + tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4], + tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4], + u[5], + normal_vector[1] * u[6] + normal_vector[2] * u[7] + + normal_vector[3] * u[8], + tangent1[1] * u[6] + tangent1[2] * u[7] + tangent1[3] * u[8], + tangent2[1] * u[6] + tangent2[2] * u[7] + tangent2[3] * u[8], + u[9]) end - # Rotate x-axis to normal vector; normal, tangent1 and tangent2 need to be orthonormal # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions # has been normalized prior to this back-rotation of the state vector # Note, for ideal GLM-MHD only the velocities and magnetic field variables back-rotate -@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, equations::IdealGlmMhdEquations3D) - # Multiply with [ 1 0 0 0 0 0 0 0 0; - # 0 | | | 0 0 0 0 0; - # 0 normal_vector tangent1 tangent2 0 0 0 0 0; - # 0 | | | 0 0 0 0 0; - # 0 0 0 0 1 0 0 0 0; - # 0 0 0 0 0 | | | 0; - # 0 0 0 0 0 normal_vector tangent1 tangent2 0; - # 0 0 0 0 0 | | | 0; - # 0 0 0 0 0 0 0 0 1 ] - return SVector(u[1], - normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4], - normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4], - normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4], - u[5], - normal_vector[1] * u[6] + tangent1[1] * u[7] + tangent2[1] * u[8], - normal_vector[2] * u[6] + tangent1[2] * u[7] + tangent2[2] * u[8], - normal_vector[3] * u[6] + tangent1[3] * u[7] + tangent2[3] * u[8], - u[9]) +@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, + equations::IdealGlmMhdEquations3D) + # Multiply with [ 1 0 0 0 0 0 0 0 0; + # 0 | | | 0 0 0 0 0; + # 0 normal_vector tangent1 tangent2 0 0 0 0 0; + # 0 | | | 0 0 0 0 0; + # 0 0 0 0 1 0 0 0 0; + # 0 0 0 0 0 | | | 0; + # 0 0 0 0 0 normal_vector tangent1 tangent2 0; + # 0 0 0 0 0 | | | 0; + # 0 0 0 0 0 0 0 0 1 ] + return SVector(u[1], + normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4], + normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4], + normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4], + u[5], + normal_vector[1] * u[6] + tangent1[1] * u[7] + tangent2[1] * u[8], + normal_vector[2] * u[6] + tangent1[2] * u[7] + tangent2[2] * u[8], + normal_vector[3] * u[6] + tangent1[3] * u[7] + tangent2[3] * u[8], + u[9]) end - @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, _ = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - cf_x_direction = calc_fast_wavespeed(u, 1, equations) - cf_y_direction = calc_fast_wavespeed(u, 2, equations) - cf_z_direction = calc_fast_wavespeed(u, 3, equations) - - return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, abs(v3) + cf_z_direction + rho, rho_v1, rho_v2, rho_v3, _ = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + cf_x_direction = calc_fast_wavespeed(u, 1, equations) + cf_y_direction = calc_fast_wavespeed(u, 2, equations) + cf_z_direction = calc_fast_wavespeed(u, 3, equations) + + return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, abs(v3) + cf_z_direction end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 - + B1 * B1 + B2 * B2 + B3 * B3 - + psi * psi)) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * (rho_e - + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3 + + B1 * B1 + B2 * B2 + B3 * B3 + + psi * psi)) - return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi) + return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi) end - # Convert conservative variables to entropy @inline function cons2entropy(u, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) - s = log(p) - equations.gamma*log(rho) - rho_p = rho / p - - w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square - w2 = rho_p * v1 - w3 = rho_p * v2 - w4 = rho_p * v3 - w5 = -rho_p - w6 = rho_p * B1 - w7 = rho_p * B2 - w8 = rho_p * B3 - w9 = rho_p * psi - - return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + p = (equations.gamma - 1) * + (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) + s = log(p) - equations.gamma * log(rho) + rho_p = rho / p + + w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square + w2 = rho_p * v1 + w3 = rho_p * v2 + w4 = rho_p * v3 + w5 = -rho_p + w6 = rho_p * B1 + w7 = rho_p * B2 + w8 = rho_p * B3 + w9 = rho_p * psi + + return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::IdealGlmMhdEquations3D) - rho, v1, v2, v3, p, B1, B2, B3, psi = prim + rho, v1, v2, v3, p, B1, B2, B3, psi = prim - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 - rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) + - 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 + rho_e = p * equations.inv_gamma_minus_one + + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + + 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 - return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) + return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) end - @inline function density(u, equations::IdealGlmMhdEquations3D) - return u[1] + return u[1] end @inline function pressure(u, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - - 0.5 * psi^2) - return p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2) + - + 0.5 * psi^2) + return p end @inline function density_pressure(u, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - - 0.5 * psi^2) - return rho * p + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2) + - + 0.5 * psi^2) + return rho * p end - # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue -@inline function calc_fast_wavespeed(cons, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) - a_square = equations.gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1 * b1 + b2 * b2 + b3 * b3 - if orientation == 1 # x-direction - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2)) - elseif orientation == 2 # y-direction - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2)) - else # z-direction - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b3^2)) - end - return c_f +@inline function calc_fast_wavespeed(cons, orientation::Integer, + equations::IdealGlmMhdEquations3D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) + a_square = equations.gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1 * b1 + b2 * b2 + b3 * b3 + if orientation == 1 # x-direction + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2)) + elseif orientation == 2 # y-direction + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2)) + else # z-direction + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b3^2)) + end + return c_f end -@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) - mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) - p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) - a_square = equations.gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1 * b1 + b2 * b2 + b3 * b3 - norm_squared = (normal_direction[1] * normal_direction[1] + - normal_direction[2] * normal_direction[2] + - normal_direction[3] * normal_direction[3]) - b_dot_n_squared = (b1 * normal_direction[1] + - b2 * normal_direction[2] + - b3 * normal_direction[3])^2 / norm_squared - - c_f = sqrt( - (0.5 * (a_square + b_square) + - 0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * norm_squared) - return c_f +@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations3D) + rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3) + p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) + a_square = equations.gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1 * b1 + b2 * b2 + b3 * b3 + norm_squared = (normal_direction[1] * normal_direction[1] + + normal_direction[2] * normal_direction[2] + + normal_direction[3] * normal_direction[3]) + b_dot_n_squared = (b1 * normal_direction[1] + + b2 * normal_direction[2] + + b3 * normal_direction[3])^2 / norm_squared + + c_f = sqrt((0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * + norm_squared) + return c_f end - """ calc_fast_wavespeed_roe(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D) @@ -902,201 +958,211 @@ Compute the fast magnetoacoustic wave speed using Roe averages as given by of Roe Matrices for Systems of Conservation Laws [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773) """ -@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) - mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll - p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) - mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr - p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2) - - # compute total pressure which is thermal + magnetic pressures - p_total_ll = p_ll + 0.5 * mag_norm_ll - p_total_rr = p_rr + 0.5 * mag_norm_rr - - # compute the Roe density averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) - inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) - rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add - rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add - # Roe averages - # velocities and magnetic fields - v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe - v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe - v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe - B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe - B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe - B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe - # enthalpy - H_ll = (rho_e_ll + p_total_ll) / rho_ll - H_rr = (rho_e_rr + p_total_rr) / rho_rr - H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe - # temporary variable see equation (4.12) in Cargo and Gallice - X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2 - # averaged components needed to compute c_f, the fast magnetoacoustic wave speed - b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum - a_square_roe = ((2.0 - equations.gamma) * X + - (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) - - b_square_roe)) # acoustic speed - # finally compute the average wave speed and set the output velocity (depends on orientation) - if orientation == 1 # x-direction - c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - vel_out_roe = v1_roe - elseif orientation == 2 # y-direction - c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - vel_out_roe = v2_roe - else # z-direction - c_a_roe = B3_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe ) - c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) ) - vel_out_roe = v3_roe - end - - return vel_out_roe, c_f_roe +@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdEquations3D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) + mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll + p_ll = (equations.gamma - 1) * + (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) + mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr + p_rr = (equations.gamma - 1) * + (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2) + + # compute total pressure which is thermal + magnetic pressures + p_total_ll = p_ll + 0.5 * mag_norm_ll + p_total_rr = p_rr + 0.5 * mag_norm_rr + + # compute the Roe density averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) + inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) + rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add + rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add + # Roe averages + # velocities and magnetic fields + v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe + v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe + v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe + B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe + B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe + B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe + # enthalpy + H_ll = (rho_e_ll + p_total_ll) / rho_ll + H_rr = (rho_e_rr + p_total_rr) / rho_rr + H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe + # temporary variable see equation (4.12) in Cargo and Gallice + X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) * + inv_sqrt_rho_add^2 + # averaged components needed to compute c_f, the fast magnetoacoustic wave speed + b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum + a_square_roe = ((2.0 - equations.gamma) * X + + (equations.gamma - 1.0) * + (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) - + b_square_roe)) # acoustic speed + # finally compute the average wave speed and set the output velocity (depends on orientation) + if orientation == 1 # x-direction + c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - + 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + vel_out_roe = v1_roe + elseif orientation == 2 # y-direction + c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - + 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + vel_out_roe = v2_roe + else # z-direction + c_a_roe = B3_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - + 4.0 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe)) + vel_out_roe = v3_roe + end + + return vel_out_roe, c_f_roe end -@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D) - rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - # Calculate primitive variables - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) - mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll - p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2) - - v1_rr = rho_v1_rr / rho_rr - v2_rr = rho_v2_rr / rho_rr - v3_rr = rho_v3_rr / rho_rr - kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) - mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr - p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2) - - # compute total pressure which is thermal + magnetic pressures - p_total_ll = p_ll + 0.5 * mag_norm_ll - p_total_rr = p_rr + 0.5 * mag_norm_rr - - # compute the Roe density averages - sqrt_rho_ll = sqrt(rho_ll) - sqrt_rho_rr = sqrt(rho_rr) - inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) - inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) - rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add - rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add - # Roe averages - # velocities and magnetic fields - v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe - v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe - v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe - B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe - B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe - B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe - # enthalpy - H_ll = (rho_e_ll + p_total_ll) / rho_ll - H_rr = (rho_e_rr + p_total_rr) / rho_rr - H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe - # temporary variable see equation (4.12) in Cargo and Gallice - X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2 - # averaged components needed to compute c_f, the fast magnetoacoustic wave speed - b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum - a_square_roe = ((2.0 - equations.gamma) * X + - (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) - - b_square_roe)) # acoustic speed - - # finally compute the average wave speed and set the output velocity (depends on orientation) - norm_squared = (normal_direction[1] * normal_direction[1] + - normal_direction[2] * normal_direction[2] + - normal_direction[3] * normal_direction[3]) - B_roe_dot_n_squared = (B1_roe * normal_direction[1] + - B2_roe * normal_direction[2] + - B3_roe * normal_direction[3])^2 / norm_squared - - c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed - a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe) - c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared) - vel_out_roe = (v1_roe * normal_direction[1] + - v2_roe * normal_direction[2] + - v3_roe * normal_direction[3]) - - return vel_out_roe, c_f_roe +@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, + equations::IdealGlmMhdEquations3D) + rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + # Calculate primitive variables + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll) + mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll + p_ll = (equations.gamma - 1) * + (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2) + + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr) + mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr + p_rr = (equations.gamma - 1) * + (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2) + + # compute total pressure which is thermal + magnetic pressures + p_total_ll = p_ll + 0.5 * mag_norm_ll + p_total_rr = p_rr + 0.5 * mag_norm_rr + + # compute the Roe density averages + sqrt_rho_ll = sqrt(rho_ll) + sqrt_rho_rr = sqrt(rho_rr) + inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr) + inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr) + rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add + rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add + # Roe averages + # velocities and magnetic fields + v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe + v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe + v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe + B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe + B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe + B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe + # enthalpy + H_ll = (rho_e_ll + p_total_ll) / rho_ll + H_rr = (rho_e_rr + p_total_rr) / rho_rr + H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe + # temporary variable see equation (4.12) in Cargo and Gallice + X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) * + inv_sqrt_rho_add^2 + # averaged components needed to compute c_f, the fast magnetoacoustic wave speed + b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum + a_square_roe = ((2.0 - equations.gamma) * X + + (equations.gamma - 1.0) * + (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) - + b_square_roe)) # acoustic speed + + # finally compute the average wave speed and set the output velocity (depends on orientation) + norm_squared = (normal_direction[1] * normal_direction[1] + + normal_direction[2] * normal_direction[2] + + normal_direction[3] * normal_direction[3]) + B_roe_dot_n_squared = (B1_roe * normal_direction[1] + + B2_roe * normal_direction[2] + + B3_roe * normal_direction[3])^2 / norm_squared + + c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed + a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe) + c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared) + vel_out_roe = (v1_roe * normal_direction[1] + + v2_roe * normal_direction[2] + + v3_roe * normal_direction[3]) + + return vel_out_roe, c_f_roe end - # Calculate thermodynamic entropy for a conservative state `cons` @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations3D) - # Pressure - p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] - - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2) - - 1/2 * cons[9]^2) - - # Thermodynamic entropy - s = log(p) - equations.gamma*log(cons[1]) - - return s + # Pressure + p = (equations.gamma - 1) * + (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] + - + 1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2) + - + 1 / 2 * cons[9]^2) + + # Thermodynamic entropy + s = log(p) - equations.gamma * log(cons[1]) + + return s end - # Calculate mathematical entropy for a conservative state `cons` @inline function entropy_math(cons, equations::IdealGlmMhdEquations3D) - S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one + S = -entropy_thermodynamic(cons, equations) * cons[1] * + equations.inv_gamma_minus_one - return S + return S end - # Default entropy is the mathematical entropy @inline entropy(cons, equations::IdealGlmMhdEquations3D) = entropy_math(cons, equations) - # Calculate total energy for a conservative state `cons` @inline energy_total(cons, ::IdealGlmMhdEquations3D) = cons[5] - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations3D) - return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1] + return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1] end - # Calculate the magnetic energy for a conservative state `cons'. # OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy @inline function energy_magnetic(cons, ::IdealGlmMhdEquations3D) - return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) + return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2) end - # Calculate internal energy for a conservative state `cons` @inline function energy_internal(cons, equations::IdealGlmMhdEquations3D) - return (energy_total(cons, equations) - - energy_kinetic(cons, equations) - - energy_magnetic(cons, equations) - - cons[9]^2 / 2) + return (energy_total(cons, equations) + - + energy_kinetic(cons, equations) + - + energy_magnetic(cons, equations) + - + cons[9]^2 / 2) end - # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons' @inline function cross_helicity(cons, ::IdealGlmMhdEquations3D) - return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1] + return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1] end - - end # @muladd diff --git a/src/equations/ideal_glm_mhd_multicomponent_1d.jl b/src/equations/ideal_glm_mhd_multicomponent_1d.jl index 59cb9bdfad8..0efa6426448 100644 --- a/src/equations/ideal_glm_mhd_multicomponent_1d.jl +++ b/src/equations/ideal_glm_mhd_multicomponent_1d.jl @@ -3,88 +3,107 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" IdealGlmMhdMulticomponentEquations1D The ideal compressible multicomponent GLM-MHD equations in one space dimension. """ -mutable struct IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT<:Real} <: AbstractIdealGlmMhdMulticomponentEquations{1, NVARS, NCOMP} - gammas ::SVector{NCOMP, RealT} - gas_constants ::SVector{NCOMP, RealT} - cv ::SVector{NCOMP, RealT} - cp ::SVector{NCOMP, RealT} - - function IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas ::SVector{NCOMP, RealT}, - gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real} - - NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) - - cv = gas_constants ./ (gammas .- 1) - cp = gas_constants + gas_constants ./ (gammas .- 1) - - new(gammas, gas_constants, cv, cp) - end +mutable struct IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT <: Real} <: + AbstractIdealGlmMhdMulticomponentEquations{1, NVARS, NCOMP} + gammas::SVector{NCOMP, RealT} + gas_constants::SVector{NCOMP, RealT} + cv::SVector{NCOMP, RealT} + cp::SVector{NCOMP, RealT} + + function IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas::SVector{ + NCOMP, + RealT + }, + gas_constants::SVector{ + NCOMP, + RealT + }) where { + NVARS, + NCOMP, + RealT <: + Real + } + NCOMP >= 1 || + throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) + + cv = gas_constants ./ (gammas .- 1) + cp = gas_constants + gas_constants ./ (gammas .- 1) + + new(gammas, gas_constants, cv, cp) + end end function IdealGlmMhdMulticomponentEquations1D(; gammas, gas_constants) + _gammas = promote(gammas...) + _gas_constants = promote(gas_constants...) + RealT = promote_type(eltype(_gammas), eltype(_gas_constants)) - _gammas = promote(gammas...) - _gas_constants = promote(gas_constants...) - RealT = promote_type(eltype(_gammas), eltype(_gas_constants)) - - NVARS = length(_gammas) + 7 - NCOMP = length(_gammas) + NVARS = length(_gammas) + 7 + NCOMP = length(_gammas) - __gammas = SVector(map(RealT, _gammas)) - __gas_constants = SVector(map(RealT, _gas_constants)) + __gammas = SVector(map(RealT, _gammas)) + __gas_constants = SVector(map(RealT, _gas_constants)) - return IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, __gas_constants) + return IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, + __gas_constants) end -@inline Base.real(::IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT +@inline function Base.real(::IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}) where { + NVARS, + NCOMP, + RealT + } + RealT +end have_nonconservative_terms(::IdealGlmMhdMulticomponentEquations1D) = False() function varnames(::typeof(cons2cons), equations::IdealGlmMhdMulticomponentEquations1D) - - cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (cons..., rhos...) + cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (cons..., rhos...) end function varnames(::typeof(cons2prim), equations::IdealGlmMhdMulticomponentEquations1D) - - prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (prim..., rhos...) + prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (prim..., rhos...) end - """ initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations1D) An Alfvén wave as smooth initial condition used for convergence tests. """ -function initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations1D) - # smooth Alfvén wave test from Derigs et al. FLASH (2016) - # domain must be set to [0, 1], γ = 5/3 - - rho = 1.0 - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) - v1 = 0.0 - si, co = sincos(2 * pi * x[1]) - v2 = 0.1 * si - v3 = 0.1 * co - p = 0.1 - B1 = 1.0 - B2 = v2 - B3 = v3 - prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3) - return prim2cons(vcat(prim_other, prim_rho), equations) -end +function initial_condition_convergence_test(x, t, + equations::IdealGlmMhdMulticomponentEquations1D) + # smooth Alfvén wave test from Derigs et al. FLASH (2016) + # domain must be set to [0, 1], γ = 5/3 + rho = 1.0 + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) + v1 = 0.0 + si, co = sincos(2 * pi * x[1]) + v2 = 0.1 * si + v3 = 0.1 * co + p = 0.1 + B1 = 1.0 + B2 = v2 + B3 = v3 + prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3) + return prim2cons(vcat(prim_other, prim_rho), equations) +end """ initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations1D) @@ -94,63 +113,70 @@ A weak blast wave adapted from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations1D) - # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Same discontinuity in the velocities but with magnetic fields - # Set up polar coordinates - inicenter = (0) - x_norm = x[1] - inicenter[1] - r = sqrt(x_norm^2) - phi = atan(x_norm) - - # Calculate primitive variables - if r > 0.5 - rho = 1.0 - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) - else - rho = 1.1691 - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) - end - v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) - p = r > 0.5 ? 1.0 : 1.245 - - prim_other = SVector{7, real(equations)}(v1, 0.0, 0.0, p, 1.0, 1.0, 1.0) - - return prim2cons(vcat(prim_other, prim_rho), equations) +function initial_condition_weak_blast_wave(x, t, + equations::IdealGlmMhdMulticomponentEquations1D) + # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Same discontinuity in the velocities but with magnetic fields + # Set up polar coordinates + inicenter = (0) + x_norm = x[1] - inicenter[1] + r = sqrt(x_norm^2) + phi = atan(x_norm) + + # Calculate primitive variables + if r > 0.5 + rho = 1.0 + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * + (1 - 2) / (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) + else + rho = 1.1691 + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * + (1 - 2) / (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) + end + v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) + p = r > 0.5 ? 1.0 : 1.245 + + prim_other = SVector{7, real(equations)}(v1, 0.0, 0.0, p, 1.0, 1.0, 1.0) + + return prim2cons(vcat(prim_other, prim_rho), equations) end - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - - rho = density(u, equations) - - v1 = rho_v1/rho - v2 = rho_v2/rho - v3 = rho_v3/rho - kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2) - mag_en = 0.5*(B1^2 + B2^2 + B3^2) - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - kin_en - mag_en) - - - f_rho = densities(u, v1, equations) - f1 = rho_v1*v1 + p + mag_en - B1^2 - f2 = rho_v1*v2 - B1*B2 - f3 = rho_v1*v3 - B1*B3 - f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) - f5 = 0.0 - f6 = v1*B2 - v2*B1 - f7 = v1*B3 - v3*B1 - - - f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7) - - return vcat(f_other, f_rho) +@inline function flux(u, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations1D) + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + + rho = density(u, equations) + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2) + mag_en = 0.5 * (B1^2 + B2^2 + B3^2) + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - kin_en - mag_en) + + f_rho = densities(u, v1, equations) + f1 = rho_v1 * v1 + p + mag_en - B1^2 + f2 = rho_v1 * v2 - B1 * B2 + f3 = rho_v1 * v3 - B1 * B3 + f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v1 - + B1 * (v1 * B1 + v2 * B2 + v3 * B3) + f5 = 0.0 + f6 = v1 * B2 - v2 * B1 + f7 = v1 * B3 - v3 * B1 + + f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7) + + return vcat(f_other, f_rho) end - """ flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D) @@ -160,92 +186,97 @@ Entropy conserving two-point flux adapted by divergence diminishing ideal magnetohydrodynamics equations for multicomponent [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002) """ -function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D) - # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) - rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr - @unpack gammas, gas_constants, cv = equations - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - gamma_ll = totalgamma(u_ll, equations) - gamma_rr = totalgamma(u_rr, equations) - - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+7], u_rr[i+7]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+7] + u_rr[i+7]) for i in eachcomponent(equations)) - - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v3_ll = rho_v3_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v3_rr = rho_v3_rr/rho_rr - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - # for convenience store v⋅B - vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll - vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr - - # Compute the necessary mean values needed for either direction - v1_avg = 0.5*(v1_ll+v1_rr) - v2_avg = 0.5*(v2_ll+v2_rr) - v3_avg = 0.5*(v3_ll+v3_rr) - v_sum = v1_avg + v2_avg + v3_avg - B1_avg = 0.5*(B1_ll+B1_rr) - B2_avg = 0.5*(B2_ll+B2_rr) - B3_avg = 0.5*(B3_ll+B3_rr) - vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr) - mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr) - vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr) - - enth = zero(v_sum) - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - - for i in eachcomponent(equations) - enth += rhok_avg[i] * gas_constants[i] - help1_ll += u_ll[i+7] * cv[i] - help1_rr += u_rr[i+7] * cv[i] - end - - T_ll = (rho_e_ll - 0.5*rho_ll * (vel_norm_ll) - 0.5*mag_norm_ll) / help1_ll - T_rr = (rho_e_rr - 0.5*rho_rr * (vel_norm_rr) - 0.5*mag_norm_rr) / help1_rr - T = 0.5 * (1.0/T_ll + 1.0/T_rr) - T_log = ln_mean(1.0/T_ll, 1.0/T_rr) - - # Calculate fluxes depending on orientation with specific direction averages - help1 = zero(T_ll) - help2 = zero(T_rr) - - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] - end - f1 = help2 * v1_avg + enth/T + 0.5 * mag_norm_avg - B1_avg*B1_avg - f2 = help2 * v2_avg - B1_avg*B2_avg - f3 = help2 * v3_avg - B1_avg*B3_avg - f5 = 0.0 - f6 = v1_avg*B2_avg - v2_avg*B1_avg - f7 = v1_avg*B3_avg - v3_avg*B1_avg - - # total energy flux is complicated and involves the previous eight components - v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr) - - f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg + - f5 * B1_avg + f6 * B2_avg + f7 * B3_avg - 0.5*v1_mag_avg + - B1_avg * vel_dot_mag_avg - - - f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7) - - return vcat(f_other, f_rho) +function flux_derigs_etal(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations1D) + # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) + rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr + @unpack gammas, gas_constants, cv = equations + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + gamma_ll = totalgamma(u_ll, equations) + gamma_rr = totalgamma(u_rr, equations) + + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 7], + u_rr[i + 7]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 7] + + u_rr[i + 7]) + for i in eachcomponent(equations)) + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + # for convenience store v⋅B + vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr + + # Compute the necessary mean values needed for either direction + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_sum = v1_avg + v2_avg + v3_avg + B1_avg = 0.5 * (B1_ll + B1_rr) + B2_avg = 0.5 * (B2_ll + B2_rr) + B3_avg = 0.5 * (B3_ll + B3_rr) + vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) + mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) + vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) + + enth = zero(v_sum) + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + + for i in eachcomponent(equations) + enth += rhok_avg[i] * gas_constants[i] + help1_ll += u_ll[i + 7] * cv[i] + help1_rr += u_rr[i + 7] * cv[i] + end + + T_ll = (rho_e_ll - 0.5 * rho_ll * (vel_norm_ll) - 0.5 * mag_norm_ll) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (vel_norm_rr) - 0.5 * mag_norm_rr) / help1_rr + T = 0.5 * (1.0 / T_ll + 1.0 / T_rr) + T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr) + + # Calculate fluxes depending on orientation with specific direction averages + help1 = zero(T_ll) + help2 = zero(T_rr) + + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = help2 * v1_avg + enth / T + 0.5 * mag_norm_avg - B1_avg * B1_avg + f2 = help2 * v2_avg - B1_avg * B2_avg + f3 = help2 * v3_avg - B1_avg * B3_avg + f5 = 0.0 + f6 = v1_avg * B2_avg - v2_avg * B1_avg + f7 = v1_avg * B3_avg - v3_avg * B1_avg + + # total energy flux is complicated and involves the previous eight components + v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr) + + f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + + f3 * v3_avg + + f5 * B1_avg + f6 * B2_avg + f7 * B3_avg - 0.5 * v1_mag_avg + + B1_avg * vel_dot_mag_avg + + f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7) + + return vcat(f_other, f_rho) end - """ flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdMulticomponentEquations1D) @@ -267,239 +298,250 @@ Hindenlang (2019), extending [`flux_ranocha`](@ref) to the MHD equations. the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D) - # Unpack left and right states - v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations) - v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations) - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Compute the necessary mean values needed for either direction - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1) - - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+7], u_rr[i+7]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+7] + u_rr[i+7]) for i in eachcomponent(equations)) - - f1 = zero(rho_ll) - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - f1 += f_rho[i] - end - - # Calculate fluxes depending on orientation with specific direction averages - f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) - #f5 below - f6 = 0.0 - f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) - f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v1_rr + p_rr * v1_ll - + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) - + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) - - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) - - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) ) ) - - f_other = SVector{7, real(equations)}(f2, f3, f4, f5, f6, f7, f8) - - return vcat(f_other, f_rho) +@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations1D) + # Unpack left and right states + v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations) + v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations) + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Compute the necessary mean values needed for either direction + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1) + + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 7], + u_rr[i + 7]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 7] + + u_rr[i + 7]) + for i in eachcomponent(equations)) + + f1 = zero(rho_ll) + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + f1 += f_rho[i] + end + + # Calculate fluxes depending on orientation with specific direction averages + f2 = f1 * v1_avg + p_avg + magnetic_square_avg - + 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) + #f5 below + f6 = 0.0 + f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) + f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + + 0.5 * (+p_ll * v1_rr + p_rr * v1_ll + + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) + + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) + - + (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) + - + (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll))) + + f_other = SVector{7, real(equations)}(f2, f3, f4, f5, f6, f7, f8) + + return vcat(f_other, f_rho) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1_ll, _ = u_ll - rho_v1_rr, _ = u_rr - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Calculate velocities (ignore orientation since it is always "1" in 1D) - # and fast magnetoacoustic wave speeds - # left - v_ll = rho_v1_ll / rho_ll - cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) - # right - v_rr = rho_v1_rr / rho_rr - cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations1D) + rho_v1_ll, _ = u_ll + rho_v1_rr, _ = u_rr + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Calculate velocities (ignore orientation since it is always "1" in 1D) + # and fast magnetoacoustic wave speeds + # left + v_ll = rho_v1_ll / rho_ll + cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) + # right + v_rr = rho_v1_rr / rho_rr + cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) + + λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) end - @inline function max_abs_speeds(u, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, _ = u + rho_v1, _ = u - rho = density(u, equations) + rho = density(u, equations) - v1 = rho_v1 / rho + v1 = rho_v1 / rho - cf_x_direction = calc_fast_wavespeed(u, 1, equations) + cf_x_direction = calc_fast_wavespeed(u, 1, equations) - return (abs(v1) + cf_x_direction, ) + return (abs(v1) + cf_x_direction,) end - # Convert conservative variables to primitive function cons2prim(u, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+7] for i in eachcomponent(equations)) - rho = density(u, equations) + prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 7] + for i in eachcomponent(equations)) + rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho - gamma = totalgamma(u, equations) + gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5*rho*(v1^2 + v2^2 + v3^2) - 0.5*(B1^2 + B2^2 + B3^2)) - prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3) - return vcat(prim_other, prim_rho) + p = (gamma - 1) * + (rho_e - 0.5 * rho * (v1^2 + v2^2 + v3^2) - 0.5 * (B1^2 + B2^2 + B3^2)) + prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3) + return vcat(prim_other, prim_rho) end # Convert conservative variables to entropy @inline function cons2entropy(u, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - @unpack cv, gammas, gas_constants = equations - - rho = density(u, equations) - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2)) - s = log(p) - gamma*log(rho) - rho_p = rho / p - - # Multicomponent stuff - help1 = zero(v1) - - for i in eachcomponent(equations) - help1 += u[i+7] * cv[i] - end - - T = (rho_e - 0.5 * rho * v_square - 0.5*(B1^2 + B2^2 + B3^2)) / (help1) - - entrop_rho = SVector{ncomponents(equations), real(equations)}( -1.0 * (cv[i] * log(T) - gas_constants[i] * log(u[i+7])) + gas_constants[i] + cv[i] - (v_square / (2*T)) for i in eachcomponent(equations)) - - w1 = v1 / T - w2 = v2 / T - w3 = v3 / T - w4 = -1.0 / T - w5 = B1 / T - w6 = B2 / T - w7 = B3 / T - - entrop_other = SVector{7, real(equations)}(w1, w2, w3, w4, w5, w6, w7) - - return vcat(entrop_other, entrop_rho) + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + @unpack cv, gammas, gas_constants = equations + + rho = density(u, equations) + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2)) + s = log(p) - gamma * log(rho) + rho_p = rho / p + + # Multicomponent stuff + help1 = zero(v1) + + for i in eachcomponent(equations) + help1 += u[i + 7] * cv[i] + end + + T = (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2)) / (help1) + + entrop_rho = SVector{ncomponents(equations), real(equations)}(-1.0 * + (cv[i] * log(T) - + gas_constants[i] * + log(u[i + 7])) + + gas_constants[i] + + cv[i] - + (v_square / (2 * T)) + for i in eachcomponent(equations)) + + w1 = v1 / T + w2 = v2 / T + w3 = v3 / T + w4 = -1.0 / T + w5 = B1 / T + w6 = B2 / T + w7 = B3 / T + + entrop_other = SVector{7, real(equations)}(w1, w2, w3, w4, w5, w6, w7) + + return vcat(entrop_other, entrop_rho) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::IdealGlmMhdMulticomponentEquations1D) - v1, v2, v3, p, B1, B2, B3 = prim + v1, v2, v3, p, B1, B2, B3 = prim - cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+7] for i in eachcomponent(equations)) - rho = density(prim, equations) + cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 7] + for i in eachcomponent(equations)) + rho = density(prim, equations) - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 - gamma = totalgamma(prim, equations) - rho_e = p/(gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) + - 0.5 * (B1^2 + B2^2 + B3^2) + gamma = totalgamma(prim, equations) + rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + + 0.5 * (B1^2 + B2^2 + B3^2) - cons_other = SVector{7, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) + cons_other = SVector{7, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3) - return vcat(cons_other, cons_rho) + return vcat(cons_other, cons_rho) end - @inline function density_pressure(u, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u - rho = density(u, equations) - gamma = totalgamma(u, equations) - p = (gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - ) - return rho * p + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u + rho = density(u, equations) + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2)) + return rho * p end - # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue -@inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdMulticomponentEquations1D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons - rho = density(cons, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_mag = sqrt(v1^2 + v2^2 + v3^2) - gamma = totalgamma(cons, equations) - p = (gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2)) - a_square = gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1^2 + b2^2 + b3^2 - - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2)) - - return c_f +@inline function calc_fast_wavespeed(cons, direction, + equations::IdealGlmMhdMulticomponentEquations1D) + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons + rho = density(cons, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_mag = sqrt(v1^2 + v2^2 + v3^2) + gamma = totalgamma(cons, equations) + p = (gamma - 1) * (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2)) + a_square = gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1^2 + b2^2 + b3^2 + + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2)) + + return c_f end - @inline function density(u, equations::IdealGlmMhdMulticomponentEquations1D) - rho = zero(u[1]) - - for i in eachcomponent(equations) - rho += u[i+7] - end + rho = zero(u[1]) - return rho - end + for i in eachcomponent(equations) + rho += u[i + 7] + end + return rho +end - @inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations1D) - @unpack cv, gammas = equations +@inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations1D) + @unpack cv, gammas = equations - help1 = zero(u[1]) - help2 = zero(u[1]) + help1 = zero(u[1]) + help2 = zero(u[1]) - for i in eachcomponent(equations) - help1 += u[i+7] * cv[i] * gammas[i] - help2 += u[i+7] * cv[i] - end + for i in eachcomponent(equations) + help1 += u[i + 7] * cv[i] * gammas[i] + help2 += u[i + 7] * cv[i] + end - return help1/help2 + return help1 / help2 end - @inline function densities(u, v, equations::IdealGlmMhdMulticomponentEquations1D) - - return SVector{ncomponents(equations), real(equations)}(u[i+7]*v for i in eachcomponent(equations)) - end - - + return SVector{ncomponents(equations), real(equations)}(u[i + 7] * v + for i in eachcomponent(equations)) +end end # @muladd diff --git a/src/equations/ideal_glm_mhd_multicomponent_2d.jl b/src/equations/ideal_glm_mhd_multicomponent_2d.jl index 3403341b47e..9b0eeb411e8 100644 --- a/src/equations/ideal_glm_mhd_multicomponent_2d.jl +++ b/src/equations/ideal_glm_mhd_multicomponent_2d.jl @@ -3,95 +3,116 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" IdealGlmMhdMulticomponentEquations2D The ideal compressible multicomponent GLM-MHD equations in two space dimensions. """ -mutable struct IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT<:Real} <: AbstractIdealGlmMhdMulticomponentEquations{2, NVARS, NCOMP} - gammas ::SVector{NCOMP, RealT} - gas_constants ::SVector{NCOMP, RealT} - cv ::SVector{NCOMP, RealT} - cp ::SVector{NCOMP, RealT} - c_h ::RealT # GLM cleaning speed - - function IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas ::SVector{NCOMP, RealT}, - gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real} - - NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) - - cv = gas_constants ./ (gammas .- 1) - cp = gas_constants + gas_constants ./ (gammas .- 1) - c_h = convert(eltype(gammas), NaN) - - new(gammas, gas_constants, cv, cp, c_h) - end +mutable struct IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT <: Real} <: + AbstractIdealGlmMhdMulticomponentEquations{2, NVARS, NCOMP} + gammas::SVector{NCOMP, RealT} + gas_constants::SVector{NCOMP, RealT} + cv::SVector{NCOMP, RealT} + cp::SVector{NCOMP, RealT} + c_h::RealT # GLM cleaning speed + + function IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas::SVector{ + NCOMP, + RealT + }, + gas_constants::SVector{ + NCOMP, + RealT + }) where { + NVARS, + NCOMP, + RealT <: + Real + } + NCOMP >= 1 || + throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value")) + + cv = gas_constants ./ (gammas .- 1) + cp = gas_constants + gas_constants ./ (gammas .- 1) + c_h = convert(eltype(gammas), NaN) + + new(gammas, gas_constants, cv, cp, c_h) + end end function IdealGlmMhdMulticomponentEquations2D(; gammas, gas_constants) + _gammas = promote(gammas...) + _gas_constants = promote(gas_constants...) + RealT = promote_type(eltype(_gammas), eltype(_gas_constants)) - _gammas = promote(gammas...) - _gas_constants = promote(gas_constants...) - RealT = promote_type(eltype(_gammas), eltype(_gas_constants)) + NVARS = length(_gammas) + 8 + NCOMP = length(_gammas) - NVARS = length(_gammas) + 8 - NCOMP = length(_gammas) + __gammas = SVector(map(RealT, _gammas)) + __gas_constants = SVector(map(RealT, _gas_constants)) - __gammas = SVector(map(RealT, _gammas)) - __gas_constants = SVector(map(RealT, _gas_constants)) - - return IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, __gas_constants) + return IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, + __gas_constants) end -@inline Base.real(::IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT +@inline function Base.real(::IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}) where { + NVARS, + NCOMP, + RealT + } + RealT +end have_nonconservative_terms(::IdealGlmMhdMulticomponentEquations2D) = True() function varnames(::typeof(cons2cons), equations::IdealGlmMhdMulticomponentEquations2D) - - cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (cons..., rhos...) + cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (cons..., rhos...) end function varnames(::typeof(cons2prim), equations::IdealGlmMhdMulticomponentEquations2D) - - prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") - rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) - return (prim..., rhos...) + prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3", "psi") + rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations))) + return (prim..., rhos...) end -default_analysis_integrals(::IdealGlmMhdMulticomponentEquations2D) = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) - +function default_analysis_integrals(::IdealGlmMhdMulticomponentEquations2D) + (entropy_timederivative, Val(:l2_divb), Val(:linf_divb)) +end """ initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations2D) An Alfvén wave as smooth initial condition used for convergence tests. """ -function initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations2D) - # smooth Alfvén wave test from Derigs et al. FLASH (2016) - # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3 - alpha = 0.25*pi - x_perp = x[1]*cos(alpha) + x[2]*sin(alpha) - B_perp = 0.1*sin(2.0*pi*x_perp) - rho = 1 - prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations)) - v1 = -B_perp*sin(alpha) - v2 = B_perp*cos(alpha) - v3 = 0.1*cos(2.0*pi*x_perp) - p = 0.1 - B1 = cos(alpha) + v1 - B2 = sin(alpha) + v2 - B3 = v3 - psi = 0.0 - prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi) - return prim2cons(vcat(prim_other, prim_rho), equations) +function initial_condition_convergence_test(x, t, + equations::IdealGlmMhdMulticomponentEquations2D) + # smooth Alfvén wave test from Derigs et al. FLASH (2016) + # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3 + alpha = 0.25 * pi + x_perp = x[1] * cos(alpha) + x[2] * sin(alpha) + B_perp = 0.1 * sin(2.0 * pi * x_perp) + rho = 1 + prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + rho + for i in eachcomponent(equations)) + v1 = -B_perp * sin(alpha) + v2 = B_perp * cos(alpha) + v3 = 0.1 * cos(2.0 * pi * x_perp) + p = 0.1 + B1 = cos(alpha) + v1 + B2 = sin(alpha) + v2 + B3 = v3 + psi = 0.0 + prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi) + return prim2cons(vcat(prim_other, prim_rho), equations) end - """ initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations2D) @@ -100,72 +121,82 @@ A weak blast wave adapted from A provably entropy stable subcell shock capturing approach for high order split form DG [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations2D) - # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) - # Same discontinuity in the velocities but with magnetic fields - # Set up polar coordinates - inicenter = SVector(0.0, 0.0) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - r = sqrt(x_norm^2 + y_norm^2) - phi = atan(y_norm, x_norm) - sin_phi, cos_phi = sincos(phi) - - prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations)) - - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi - p = r > 0.5 ? 1.0 : 1.245 - - prim_other = SVector{8, real(equations)}(v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0) - - return prim2cons(vcat(prim_other, prim_rho),equations) +function initial_condition_weak_blast_wave(x, t, + equations::IdealGlmMhdMulticomponentEquations2D) + # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3) + # Same discontinuity in the velocities but with magnetic fields + # Set up polar coordinates + inicenter = SVector(0.0, 0.0) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + phi = atan(y_norm, x_norm) + sin_phi, cos_phi = sincos(phi) + + prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.0 : + 2^(i - 1) * (1 - 2) / + (1 - + 2^ncomponents(equations)) * + 1.1691 + for i in eachcomponent(equations)) + + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi + p = r > 0.5 ? 1.0 : 1.245 + + prim_other = SVector{8, real(equations)}(v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0) + + return prim2cons(vcat(prim_other, prim_rho), equations) end - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - @unpack c_h = equations - - rho = density(u, equations) - - v1 = rho_v1/rho - v2 = rho_v2/rho - v3 = rho_v3/rho - kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2) - mag_en = 0.5*(B1^2 + B2^2 + B3^2) - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - kin_en - mag_en - 0.5*psi^2) - - if orientation == 1 - f_rho = densities(u, v1, equations) - f1 = rho_v1*v1 + p + mag_en - B1^2 - f2 = rho_v1*v2 - B1*B2 - f3 = rho_v1*v3 - B1*B3 - f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + c_h*psi*B1 - f5 = c_h*psi - f6 = v1*B2 - v2*B1 - f7 = v1*B3 - v3*B1 - f8 = c_h*B1 - else # orientation == 2 - f_rho = densities(u, v2, equations) - f1 = rho_v2*v1 - B1*B2 - f2 = rho_v2*v2 + p + mag_en - B2^2 - f3 = rho_v2*v3 - B2*B3 - f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + c_h*psi*B2 - f5 = v2*B1 - v1*B2 - f6 = c_h*psi - f7 = v2*B3 - v3*B2 - f8 = c_h*B2 - end - - f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8) - - return vcat(f_other, f_rho) -end +@inline function flux(u, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations2D) + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + @unpack c_h = equations + + rho = density(u, equations) + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2) + mag_en = 0.5 * (B1^2 + B2^2 + B3^2) + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2) + + if orientation == 1 + f_rho = densities(u, v1, equations) + f1 = rho_v1 * v1 + p + mag_en - B1^2 + f2 = rho_v1 * v2 - B1 * B2 + f3 = rho_v1 * v3 - B1 * B3 + f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v1 - + B1 * (v1 * B1 + v2 * B2 + v3 * B3) + c_h * psi * B1 + f5 = c_h * psi + f6 = v1 * B2 - v2 * B1 + f7 = v1 * B3 - v3 * B1 + f8 = c_h * B1 + else # orientation == 2 + f_rho = densities(u, v2, equations) + f1 = rho_v2 * v1 - B1 * B2 + f2 = rho_v2 * v2 + p + mag_en - B2^2 + f3 = rho_v2 * v3 - B2 * B3 + f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v2 - + B2 * (v1 * B1 + v2 * B2 + v3 * B3) + c_h * psi * B2 + f5 = v2 * B1 - v1 * B2 + f6 = c_h * psi + f7 = v2 * B3 - v3 * B2 + f8 = c_h * B2 + end + f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8) + return vcat(f_other, f_rho) +end """ flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, @@ -184,47 +215,46 @@ of the [`IdealGlmMhdMulticomponentEquations2D`](@ref). """ @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - - rho_ll = density(u_ll, equations) - - v1_ll = rho_v1_ll / rho_ll - v2_ll = rho_v2_ll / rho_ll - v3_ll = rho_v3_ll / rho_ll - v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll - - # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) - # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) - # Note that the order of conserved variables is changed compared to the - # standard GLM MHD equations, i.e., the densities are moved to the end - # Here, we compute the non-density components at first and append zero density - # components afterwards - zero_densities = SVector{ncomponents(equations), real(equations)}( - ntuple(_ -> zero(real(equations)), Val(ncomponents(equations)))) - if orientation == 1 - f = SVector(B1_ll * B1_rr, - B2_ll * B1_rr, - B3_ll * B1_rr, - v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, - v1_ll * B1_rr, - v2_ll * B1_rr, - v3_ll * B1_rr, - v1_ll * psi_rr) - else # orientation == 2 - f = SVector(B1_ll * B2_rr, - B2_ll * B2_rr, - B3_ll * B2_rr, - v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, - v1_ll * B2_rr, - v2_ll * B2_rr, - v3_ll * B2_rr, - v2_ll * psi_rr) - end - - return vcat(f, zero_densities) -end + rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + + rho_ll = density(u_ll, equations) + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + + # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0) + # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2}) + # Note that the order of conserved variables is changed compared to the + # standard GLM MHD equations, i.e., the densities are moved to the end + # Here, we compute the non-density components at first and append zero density + # components afterwards + zero_densities = SVector{ncomponents(equations), real(equations)}(ntuple(_ -> zero(real(equations)), + Val(ncomponents(equations)))) + if orientation == 1 + f = SVector(B1_ll * B1_rr, + B2_ll * B1_rr, + B3_ll * B1_rr, + v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr, + v1_ll * B1_rr, + v2_ll * B1_rr, + v3_ll * B1_rr, + v1_ll * psi_rr) + else # orientation == 2 + f = SVector(B1_ll * B2_rr, + B2_ll * B2_rr, + B3_ll * B2_rr, + v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr, + v1_ll * B2_rr, + v2_ll * B2_rr, + v3_ll * B2_rr, + v2_ll * psi_rr) + end + return vcat(f, zero_densities) +end """ flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdMulticomponentEquations2D) @@ -235,125 +265,134 @@ Entropy conserving two-point flux adapted by divergence diminishing ideal magnetohydrodynamics equations for multicomponent [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002) """ -function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D) - # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) - rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll - rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr - @unpack gammas, gas_constants, cv, c_h = equations - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - gamma_ll = totalgamma(u_ll, equations) - gamma_rr = totalgamma(u_rr, equations) - - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+8], u_rr[i+8]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+8] + u_rr[i+8]) for i in eachcomponent(equations)) - - v1_ll = rho_v1_ll/rho_ll - v2_ll = rho_v2_ll/rho_ll - v3_ll = rho_v3_ll/rho_ll - v1_rr = rho_v1_rr/rho_rr - v2_rr = rho_v2_rr/rho_rr - v3_rr = rho_v3_rr/rho_rr - v1_sq = 0.5 * (v1_ll^2 + v1_rr^2) - v2_sq = 0.5 * (v2_ll^2 + v2_rr^2) - v3_sq = 0.5 * (v3_ll^2 + v3_rr^2) - v_sq = v1_sq + v2_sq + v3_sq - B1_sq = 0.5 * (B1_ll^2 + B1_rr^2) - B2_sq = 0.5 * (B2_ll^2 + B2_rr^2) - B3_sq = 0.5 * (B3_ll^2 + B3_rr^2) - B_sq = B1_sq + B2_sq + B3_sq - vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 - vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 - mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 - mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 - # for convenience store v⋅B - vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll - vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr - - # Compute the necessary mean values needed for either direction - v1_avg = 0.5*(v1_ll+v1_rr) - v2_avg = 0.5*(v2_ll+v2_rr) - v3_avg = 0.5*(v3_ll+v3_rr) - v_sum = v1_avg + v2_avg + v3_avg - B1_avg = 0.5*(B1_ll+B1_rr) - B2_avg = 0.5*(B2_ll+B2_rr) - B3_avg = 0.5*(B3_ll+B3_rr) - psi_avg = 0.5*(psi_ll+psi_rr) - vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr) - mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr) - vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr) - - enth = zero(v_sum) - help1_ll = zero(v1_ll) - help1_rr = zero(v1_rr) - - for i in eachcomponent(equations) - enth += rhok_avg[i] * gas_constants[i] - help1_ll += u_ll[i+8] * cv[i] - help1_rr += u_rr[i+8] * cv[i] - end - - T_ll = (rho_e_ll - 0.5*rho_ll * (vel_norm_ll) - 0.5*mag_norm_ll - 0.5*psi_ll^2) / help1_ll - T_rr = (rho_e_rr - 0.5*rho_rr * (vel_norm_rr) - 0.5*mag_norm_rr - 0.5*psi_rr^2) / help1_rr - T = 0.5 * (1.0/T_ll + 1.0/T_rr) - T_log = ln_mean(1.0/T_ll, 1.0/T_rr) - - # Calculate fluxes depending on orientation with specific direction averages - help1 = zero(T_ll) - help2 = zero(T_rr) - if orientation == 1 - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] - end - f1 = help2 * v1_avg + enth/T + 0.5 * mag_norm_avg - B1_avg*B1_avg - f2 = help2 * v2_avg - B1_avg*B2_avg - f3 = help2 * v3_avg - B1_avg*B3_avg - f5 = c_h*psi_avg - f6 = v1_avg*B2_avg - v2_avg*B1_avg - f7 = v1_avg*B3_avg - v3_avg*B1_avg - f8 = c_h*B1_avg - # total energy flux is complicated and involves the previous eight components - psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr) - v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr) - - f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg + - f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5*v1_mag_avg + - B1_avg * vel_dot_mag_avg - c_h * psi_B1_avg - - else - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations)) +function flux_derigs_etal(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations2D) + # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta) + rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll + rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr + @unpack gammas, gas_constants, cv, c_h = equations + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + gamma_ll = totalgamma(u_ll, equations) + gamma_rr = totalgamma(u_rr, equations) + + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 8], + u_rr[i + 8]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 8] + + u_rr[i + 8]) + for i in eachcomponent(equations)) + + v1_ll = rho_v1_ll / rho_ll + v2_ll = rho_v2_ll / rho_ll + v3_ll = rho_v3_ll / rho_ll + v1_rr = rho_v1_rr / rho_rr + v2_rr = rho_v2_rr / rho_rr + v3_rr = rho_v3_rr / rho_rr + v1_sq = 0.5 * (v1_ll^2 + v1_rr^2) + v2_sq = 0.5 * (v2_ll^2 + v2_rr^2) + v3_sq = 0.5 * (v3_ll^2 + v3_rr^2) + v_sq = v1_sq + v2_sq + v3_sq + B1_sq = 0.5 * (B1_ll^2 + B1_rr^2) + B2_sq = 0.5 * (B2_ll^2 + B2_rr^2) + B3_sq = 0.5 * (B3_ll^2 + B3_rr^2) + B_sq = B1_sq + B2_sq + B3_sq + vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2 + vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2 + mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2 + mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2 + # for convenience store v⋅B + vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll + vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr + + # Compute the necessary mean values needed for either direction + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_sum = v1_avg + v2_avg + v3_avg + B1_avg = 0.5 * (B1_ll + B1_rr) + B2_avg = 0.5 * (B2_ll + B2_rr) + B3_avg = 0.5 * (B3_ll + B3_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr) + mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr) + vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr) + + enth = zero(v_sum) + help1_ll = zero(v1_ll) + help1_rr = zero(v1_rr) + for i in eachcomponent(equations) - help1 += f_rho[i] * cv[i] - help2 += f_rho[i] + enth += rhok_avg[i] * gas_constants[i] + help1_ll += u_ll[i + 8] * cv[i] + help1_rr += u_rr[i + 8] * cv[i] end - f1 = help2 * v1_avg - B1_avg*B2_avg - f2 = help2 * v2_avg + enth/T + 0.5 * mag_norm_avg - B2_avg*B2_avg - f3 = help2 * v3_avg - B2_avg*B3_avg - f5 = v2_avg*B1_avg - v1_avg*B2_avg - f6 = c_h*psi_avg - f7 = v2_avg*B3_avg - v3_avg*B2_avg - f8 = c_h*B2_avg - # total energy flux is complicated and involves the previous eight components - psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr) - v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr) + T_ll = (rho_e_ll - 0.5 * rho_ll * (vel_norm_ll) - 0.5 * mag_norm_ll - + 0.5 * psi_ll^2) / help1_ll + T_rr = (rho_e_rr - 0.5 * rho_rr * (vel_norm_rr) - 0.5 * mag_norm_rr - + 0.5 * psi_rr^2) / help1_rr + T = 0.5 * (1.0 / T_ll + 1.0 / T_rr) + T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr) - f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg + - f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5*v2_mag_avg + - B2_avg * vel_dot_mag_avg - c_h * psi_B2_avg - - end + # Calculate fluxes depending on orientation with specific direction averages + help1 = zero(T_ll) + help2 = zero(T_rr) + if orientation == 1 + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = help2 * v1_avg + enth / T + 0.5 * mag_norm_avg - B1_avg * B1_avg + f2 = help2 * v2_avg - B1_avg * B2_avg + f3 = help2 * v3_avg - B1_avg * B3_avg + f5 = c_h * psi_avg + f6 = v1_avg * B2_avg - v2_avg * B1_avg + f7 = v1_avg * B3_avg - v3_avg * B1_avg + f8 = c_h * B1_avg + # total energy flux is complicated and involves the previous eight components + psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr) + v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr) + + f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + + f2 * v2_avg + f3 * v3_avg + + f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5 * v1_mag_avg + + B1_avg * vel_dot_mag_avg - c_h * psi_B1_avg + + else + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + help1 += f_rho[i] * cv[i] + help2 += f_rho[i] + end + f1 = help2 * v1_avg - B1_avg * B2_avg + f2 = help2 * v2_avg + enth / T + 0.5 * mag_norm_avg - B2_avg * B2_avg + f3 = help2 * v3_avg - B2_avg * B3_avg + f5 = v2_avg * B1_avg - v1_avg * B2_avg + f6 = c_h * psi_avg + f7 = v2_avg * B3_avg - v3_avg * B2_avg + f8 = c_h * B2_avg + + # total energy flux is complicated and involves the previous eight components + psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr) + v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr) + + f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + + f2 * v2_avg + f3 * v3_avg + + f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5 * v2_mag_avg + + B2_avg * vel_dot_mag_avg - c_h * psi_B2_avg + end - f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8) + f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8) - return vcat(f_other, f_rho) + return vcat(f_other, f_rho) end - """ flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdMulticomponentEquations2D) @@ -375,277 +414,301 @@ Hindenlang (2019), extending [`flux_ranocha`](@ref) to the MHD equations. the Euler Equations Using Summation-by-Parts Operators [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42) """ -@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D) - # Unpack left and right states - v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) - v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Compute the necessary mean values needed for either direction - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - psi_avg = 0.5 * (psi_ll + psi_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) - - inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1) - - rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+8], u_rr[i+8]) for i in eachcomponent(equations)) - rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+8] + u_rr[i+8]) for i in eachcomponent(equations)) - - - if orientation == 1 - f1 = zero(rho_ll) - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - f1 += f_rho[i] +@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations2D) + # Unpack left and right states + v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations) + v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations) + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Compute the necessary mean values needed for either direction + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + psi_avg = 0.5 * (psi_ll + psi_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr) + + inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1) + + rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 8], + u_rr[i + 8]) + for i in eachcomponent(equations)) + rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 8] + + u_rr[i + 8]) + for i in eachcomponent(equations)) + + if orientation == 1 + f1 = zero(rho_ll) + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + f1 += f_rho[i] + end + + # Calculate fluxes depending on orientation with specific direction averages + f2 = f1 * v1_avg + p_avg + magnetic_square_avg - + 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) + f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) + #f5 below + f6 = f6 = equations.c_h * psi_avg + f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) + f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) + f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + + 0.5 * (+p_ll * v1_rr + p_rr * v1_ll + + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) + + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) + - + (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) + - + (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) + + + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll))) + else + f1 = zero(rho_ll) + f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg + for i in eachcomponent(equations)) + for i in eachcomponent(equations) + f1 += f_rho[i] + end + + # Calculate fluxes depending on orientation with specific direction averages + f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) + f3 = f1 * v2_avg + p_avg + magnetic_square_avg - + 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) + f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) + #f5 below + f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) + f7 = equations.c_h * psi_avg + f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) + f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) + # total energy flux is complicated and involves the previous components + f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + + + 0.5 * (+p_ll * v2_rr + p_rr * v2_ll + + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) + + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) + - + (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) + - + (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) + + + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll))) end - # Calculate fluxes depending on orientation with specific direction averages - f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll) - f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll) - #f5 below - f6 = f6 = equations.c_h * psi_avg - f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr) - f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr) - f9 = equations.c_h * 0.5 * (B1_ll + B1_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v1_rr + p_rr * v1_ll - + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll) - + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll) - - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll) - - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) - + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) ) - else - f1 = zero(rho_ll) - f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations)) - for i in eachcomponent(equations) - f1 += f_rho[i] - end + f_other = SVector{8, real(equations)}(f2, f3, f4, f5, f6, f7, f8, f9) - # Calculate fluxes depending on orientation with specific direction averages - f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll) - f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll) - f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll) - #f5 below - f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr) - f7 = equations.c_h * psi_avg - f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr) - f9 = equations.c_h * 0.5 * (B2_ll + B2_rr) - # total energy flux is complicated and involves the previous components - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one ) - + 0.5 * ( - + p_ll * v2_rr + p_rr * v2_ll - + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll) - + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll) - - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll) - - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll) - + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) ) - end - - f_other = SVector{8, real(equations)}(f2, f3, f4, f5, f6, f7, f8, f9) - - return vcat(f_other, f_rho) + return vcat(f_other, f_rho) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1_ll, rho_v2_ll, _ = u_ll - rho_v1_rr, rho_v2_rr, _ = u_rr - - rho_ll = density(u_ll, equations) - rho_rr = density(u_rr, equations) - - # Calculate velocities and fast magnetoacoustic wave speeds - if orientation == 1 - v_ll = rho_v1_ll / rho_ll - v_rr = rho_v1_rr / rho_rr - else # orientation == 2 - v_ll = rho_v2_ll / rho_ll - v_rr = rho_v2_rr / rho_rr - end - cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) - cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) - - λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) -end +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::IdealGlmMhdMulticomponentEquations2D) + rho_v1_ll, rho_v2_ll, _ = u_ll + rho_v1_rr, rho_v2_rr, _ = u_rr + + rho_ll = density(u_ll, equations) + rho_rr = density(u_rr, equations) + + # Calculate velocities and fast magnetoacoustic wave speeds + if orientation == 1 + v_ll = rho_v1_ll / rho_ll + v_rr = rho_v1_rr / rho_rr + else # orientation == 2 + v_ll = rho_v2_ll / rho_ll + v_rr = rho_v2_rr / rho_rr + end + cf_ll = calc_fast_wavespeed(u_ll, orientation, equations) + cf_rr = calc_fast_wavespeed(u_rr, orientation, equations) + λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr) +end @inline function max_abs_speeds(u, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, _ = u + rho_v1, rho_v2, _ = u - rho = density(u, equations) + rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho + v1 = rho_v1 / rho + v2 = rho_v2 / rho - cf_x_direction = calc_fast_wavespeed(u, 1, equations) - cf_y_direction = calc_fast_wavespeed(u, 2, equations) + cf_x_direction = calc_fast_wavespeed(u, 1, equations) + cf_y_direction = calc_fast_wavespeed(u, 2, equations) - return (abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, ) + return (abs(v1) + cf_x_direction, abs(v2) + cf_y_direction) end - @inline function density_pressure(u, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - rho = density(u, equations) - gamma = totalgamma(u, equations) - p = (gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho - - 0.5 * (B1^2 + B2^2 + B3^2) - - 0.5 * psi^2) - return rho * p + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + rho = density(u, equations) + gamma = totalgamma(u, equations) + p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho + - + 0.5 * (B1^2 + B2^2 + B3^2) + - + 0.5 * psi^2) + return rho * p end - # Convert conservative variables to primitive function cons2prim(u, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+8] for i in eachcomponent(equations)) - rho = density(u, equations) + prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 8] + for i in eachcomponent(equations)) + rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho - gamma = totalgamma(u, equations) + gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5*rho*(v1^2 + v2^2 + v3^2) - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) - prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi) - return vcat(prim_other, prim_rho) + p = (gamma - 1) * + (rho_e - 0.5 * rho * (v1^2 + v2^2 + v3^2) - 0.5 * (B1^2 + B2^2 + B3^2) - + 0.5 * psi^2) + prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi) + return vcat(prim_other, prim_rho) end # Convert conservative variables to entropy @inline function cons2entropy(u, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u - @unpack cv, gammas, gas_constants = equations + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u + @unpack cv, gammas, gas_constants = equations - rho = density(u, equations) + rho = density(u, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - gamma = totalgamma(u, equations) - p = (gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) - s = log(p) - gamma*log(rho) - rho_p = rho / p + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + gamma = totalgamma(u, equations) + p = (gamma - 1) * + (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) + s = log(p) - gamma * log(rho) + rho_p = rho / p - # Multicomponent stuff - help1 = zero(v1) + # Multicomponent stuff + help1 = zero(v1) - for i in eachcomponent(equations) - help1 += u[i+8] * cv[i] - end - - T = (rho_e - 0.5 * rho * v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) / (help1) - - entrop_rho = SVector{ncomponents(equations), real(equations)}( -1.0 * (cv[i] * log(T) - gas_constants[i] * log(u[i+8])) + gas_constants[i] + cv[i] - (v_square / (2*T)) for i in eachcomponent(equations)) - - w1 = v1 / T - w2 = v2 / T - w3 = v3 / T - w4 = -1.0 / T - w5 = B1 / T - w6 = B2 / T - w7 = B3 / T - w8 = psi / T - - entrop_other = SVector{8, real(equations)}(w1, w2, w3, w4, w5, w6, w7, w8) + for i in eachcomponent(equations) + help1 += u[i + 8] * cv[i] + end - return vcat(entrop_other, entrop_rho) + T = (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) / + (help1) + + entrop_rho = SVector{ncomponents(equations), real(equations)}(-1.0 * + (cv[i] * log(T) - + gas_constants[i] * + log(u[i + 8])) + + gas_constants[i] + + cv[i] - + (v_square / (2 * T)) + for i in eachcomponent(equations)) + + w1 = v1 / T + w2 = v2 / T + w3 = v3 / T + w4 = -1.0 / T + w5 = B1 / T + w6 = B2 / T + w7 = B3 / T + w8 = psi / T + + entrop_other = SVector{8, real(equations)}(w1, w2, w3, w4, w5, w6, w7, w8) + + return vcat(entrop_other, entrop_rho) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::IdealGlmMhdMulticomponentEquations2D) - v1, v2, v3, p, B1, B2, B3, psi = prim + v1, v2, v3, p, B1, B2, B3, psi = prim - cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+8] for i in eachcomponent(equations)) - rho = density(prim, equations) + cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 8] + for i in eachcomponent(equations)) + rho = density(prim, equations) - rho_v1 = rho * v1 - rho_v2 = rho * v2 - rho_v3 = rho * v3 + rho_v1 = rho * v1 + rho_v2 = rho * v2 + rho_v3 = rho * v3 - gamma = totalgamma(prim, equations) - rho_e = p/(gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) + - 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 + gamma = totalgamma(prim, equations) + rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) + + 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2 - cons_other = SVector{8, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi) + cons_other = SVector{8, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, + psi) - return vcat(cons_other, cons_rho) + return vcat(cons_other, cons_rho) end - # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue -@inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdMulticomponentEquations2D) - rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons - rho = density(cons, equations) - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_mag = sqrt(v1^2 + v2^2 + v3^2) - gamma = totalgamma(cons, equations) - p = (gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) - a_square = gamma * p / rho - sqrt_rho = sqrt(rho) - b1 = B1 / sqrt_rho - b2 = B2 / sqrt_rho - b3 = B3 / sqrt_rho - b_square = b1^2 + b2^2 + b3^2 - if direction == 1 # x-direction - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2)) - else - c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2)) - end - return c_f +@inline function calc_fast_wavespeed(cons, direction, + equations::IdealGlmMhdMulticomponentEquations2D) + rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons + rho = density(cons, equations) + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_mag = sqrt(v1^2 + v2^2 + v3^2) + gamma = totalgamma(cons, equations) + p = (gamma - 1) * + (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) + a_square = gamma * p / rho + sqrt_rho = sqrt(rho) + b1 = B1 / sqrt_rho + b2 = B2 / sqrt_rho + b3 = B3 / sqrt_rho + b_square = b1^2 + b2^2 + b3^2 + if direction == 1 # x-direction + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2)) + else + c_f = sqrt(0.5 * (a_square + b_square) + + 0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2)) + end + return c_f end - @inline function density(u, equations::IdealGlmMhdMulticomponentEquations2D) - rho = zero(u[1]) - - for i in eachcomponent(equations) - rho += u[i+8] - end + rho = zero(u[1]) - return rho - end + for i in eachcomponent(equations) + rho += u[i + 8] + end + return rho +end - @inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations2D) - @unpack cv, gammas = equations +@inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations2D) + @unpack cv, gammas = equations - help1 = zero(u[1]) - help2 = zero(u[1]) + help1 = zero(u[1]) + help2 = zero(u[1]) - for i in eachcomponent(equations) - help1 += u[i+8] * cv[i] * gammas[i] - help2 += u[i+8] * cv[i] - end + for i in eachcomponent(equations) + help1 += u[i + 8] * cv[i] * gammas[i] + help2 += u[i + 8] * cv[i] + end - return help1/help2 + return help1 / help2 end - @inline function densities(u, v, equations::IdealGlmMhdMulticomponentEquations2D) - - return SVector{ncomponents(equations), real(equations)}(u[i+8]*v for i in eachcomponent(equations)) - end - - + return SVector{ncomponents(equations), real(equations)}(u[i + 8] * v + for i in eachcomponent(equations)) +end end # @muladd diff --git a/src/equations/inviscid_burgers_1d.jl b/src/equations/inviscid_burgers_1d.jl index 18e2ed4600b..8d4410b6ffe 100644 --- a/src/equations/inviscid_burgers_1d.jl +++ b/src/equations/inviscid_burgers_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" InviscidBurgersEquation1D @@ -16,10 +16,8 @@ in one space dimension. """ struct InviscidBurgersEquation1D <: AbstractInviscidBurgersEquation{1, 1} end - -varnames(::typeof(cons2cons), ::InviscidBurgersEquation1D) = ("scalar", ) -varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar", ) - +varnames(::typeof(cons2cons), ::InviscidBurgersEquation1D) = ("scalar",) +varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar",) # Set initial conditions at physical location `x` for time `t` """ @@ -28,109 +26,104 @@ varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar", ) A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equation::InviscidBurgersEquation1D) - return SVector(2.0) + return SVector(2.0) end - """ initial_condition_convergence_test(x, t, equations::InviscidBurgersEquation1D) A smooth initial condition used for convergence tests. """ function initial_condition_convergence_test(x, t, equation::InviscidBurgersEquation1D) - c = 2.0 - A = 1.0 - L = 1 - f = 1/L - omega = 2 * pi * f - scalar = c + A * sin(omega * (x[1] - t)) - - return SVector(scalar) + c = 2.0 + A = 1.0 + L = 1 + f = 1 / L + omega = 2 * pi * f + scalar = c + A * sin(omega * (x[1] - t)) + + return SVector(scalar) end - """ source_terms_convergence_test(u, x, t, equations::InviscidBurgersEquation1D) Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref). """ -@inline function source_terms_convergence_test(u, x, t, equations::InviscidBurgersEquation1D) - # Same settings as in `initial_condition` - c = 2.0 - A = 1.0 - L = 1 - f = 1/L - omega = 2 * pi * f - du = omega * A * cos(omega * (x[1] - t)) * (c - 1 + A * sin(omega * (x[1] - t))) - - return SVector(du) +@inline function source_terms_convergence_test(u, x, t, + equations::InviscidBurgersEquation1D) + # Same settings as in `initial_condition` + c = 2.0 + A = 1.0 + L = 1 + f = 1 / L + omega = 2 * pi * f + du = omega * A * cos(omega * (x[1] - t)) * (c - 1 + A * sin(omega * (x[1] - t))) + + return SVector(du) end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::InviscidBurgersEquation1D) - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equation::InviscidBurgersEquation1D) - return SVector(0.5 * u[1]^2) + return SVector(0.5 * u[1]^2) end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::InviscidBurgersEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::InviscidBurgersEquation1D) + u_L = u_ll[1] + u_R = u_rr[1] - λ_max = max(abs(u_L), abs(u_R)) + λ_max = max(abs(u_L), abs(u_R)) end # Calculate minimum and maximum wave speeds for HLL-type fluxes -@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::InviscidBurgersEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] +@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, + equations::InviscidBurgersEquation1D) + u_L = u_ll[1] + u_R = u_rr[1] - λ_min = min(u_L, u_R) - λ_max = max(u_L, u_R) + λ_min = min(u_L, u_R) + λ_max = max(u_L, u_R) - return λ_min, λ_max + return λ_min, λ_max end @inline function max_abs_speeds(u, equation::InviscidBurgersEquation1D) - return (abs(u[1]),) + return (abs(u[1]),) end - # (Symmetric) Entropy Conserving flux function flux_ec(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] + u_L = u_ll[1] + u_R = u_rr[1] - return SVector((u_L^2 + u_L * u_R + u_R^2) / 6) + return SVector((u_L^2 + u_L * u_R + u_R^2) / 6) end - # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf , # section 4.1.5 and especially equation (4.16). function flux_godunov(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] + u_L = u_ll[1] + u_R = u_rr[1] - return SVector(0.5 * max(max(u_L, zero(u_L))^2, min(u_R, zero(u_R))^2)) + return SVector(0.5 * max(max(u_L, zero(u_L))^2, min(u_R, zero(u_R))^2)) end - # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf , # section 4.2.5 and especially equation (4.34). -function flux_engquist_osher(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] +function flux_engquist_osher(u_ll, u_rr, orientation, + equation::InviscidBurgersEquation1D) + u_L = u_ll[1] + u_R = u_rr[1] - return SVector(0.5 * (max(u_L, zero(u_L))^2 + min(u_R, zero(u_R))^2)) + return SVector(0.5 * (max(u_L, zero(u_L))^2 + min(u_R, zero(u_R))^2)) end - """ splitting_lax_friedrichs(u, orientation::Integer, equations::InviscidBurgersEquation1D) @@ -151,41 +144,38 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`. """ @inline function splitting_lax_friedrichs(u, orientation::Integer, equations::InviscidBurgersEquation1D) - fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations) - fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations) - return fm, fp + fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations) + fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations) + return fm, fp end @inline function splitting_lax_friedrichs(u, ::Val{:plus}, orientation::Integer, equations::InviscidBurgersEquation1D) - f = 0.5 * u[1]^2 - lambda = abs(u[1]) - return SVector(0.5 * (f + lambda * u[1])) + f = 0.5 * u[1]^2 + lambda = abs(u[1]) + return SVector(0.5 * (f + lambda * u[1])) end @inline function splitting_lax_friedrichs(u, ::Val{:minus}, orientation::Integer, equations::InviscidBurgersEquation1D) - f = 0.5 * u[1]^2 - lambda = abs(u[1]) - return SVector(0.5 * (f - lambda * u[1])) + f = 0.5 * u[1]^2 + lambda = abs(u[1]) + return SVector(0.5 * (f - lambda * u[1])) end - # Convert conservative variables to primitive @inline cons2prim(u, equation::InviscidBurgersEquation1D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equation::InviscidBurgersEquation1D) = u - # Calculate entropy for a conservative state `cons` @inline entropy(u::Real, ::InviscidBurgersEquation1D) = 0.5 * u^2 @inline entropy(u, equation::InviscidBurgersEquation1D) = entropy(u[1], equation) - # Calculate total energy for a conservative state `cons` @inline energy_total(u::Real, ::InviscidBurgersEquation1D) = 0.5 * u^2 -@inline energy_total(u, equation::InviscidBurgersEquation1D) = energy_total(u[1], equation) - - +@inline function energy_total(u, equation::InviscidBurgersEquation1D) + energy_total(u[1], equation) +end end # @muladd diff --git a/src/equations/laplace_diffusion_1d.jl b/src/equations/laplace_diffusion_1d.jl index 2573a3d0d04..815b9908c1e 100644 --- a/src/equations/laplace_diffusion_1d.jl +++ b/src/equations/laplace_diffusion_1d.jl @@ -5,44 +5,54 @@ with diffusivity ``\kappa`` applied to each solution component defined by `equations`. """ struct LaplaceDiffusion1D{E, N, T} <: AbstractLaplaceDiffusion{1, N} - diffusivity::T - equations_hyperbolic::E + diffusivity::T + equations_hyperbolic::E end -LaplaceDiffusion1D(diffusivity, equations_hyperbolic) = - LaplaceDiffusion1D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), typeof(diffusivity)}(diffusivity, equations_hyperbolic) +function LaplaceDiffusion1D(diffusivity, equations_hyperbolic) + LaplaceDiffusion1D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), + typeof(diffusivity)}(diffusivity, equations_hyperbolic) +end -varnames(variable_mapping, equations_parabolic::LaplaceDiffusion1D) = - varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +function varnames(variable_mapping, equations_parabolic::LaplaceDiffusion1D) + varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +end function flux(u, gradients, orientation::Integer, equations_parabolic::LaplaceDiffusion1D) - dudx = gradients - # orientation == 1 - return equations_parabolic.diffusivity * dudx + dudx = gradients + # orientation == 1 + return equations_parabolic.diffusivity * dudx end - # Dirichlet-type boundary condition for use with a parabolic solver in weak form -@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, +@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, equations_parabolic::LaplaceDiffusion1D) - return boundary_condition.boundary_value_function(x, t, equations_parabolic) + return boundary_condition.boundary_value_function(x, t, equations_parabolic) end -@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, +@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, equations_parabolic::LaplaceDiffusion1D) - return flux_inner + return flux_inner end -@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, +@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, equations_parabolic::LaplaceDiffusion1D) - return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic) + return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic) end -@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, +@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, equations_parabolic::LaplaceDiffusion1D) - return flux_inner + return flux_inner end diff --git a/src/equations/laplace_diffusion_2d.jl b/src/equations/laplace_diffusion_2d.jl index 3963c616af2..3443e9c097b 100644 --- a/src/equations/laplace_diffusion_2d.jl +++ b/src/equations/laplace_diffusion_2d.jl @@ -5,54 +5,66 @@ with diffusivity ``\kappa`` applied to each solution component defined by `equations`. """ struct LaplaceDiffusion2D{E, N, T} <: AbstractLaplaceDiffusion{2, N} - diffusivity::T - equations_hyperbolic::E + diffusivity::T + equations_hyperbolic::E end -LaplaceDiffusion2D(diffusivity, equations_hyperbolic) = - LaplaceDiffusion2D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), typeof(diffusivity)}(diffusivity, equations_hyperbolic) +function LaplaceDiffusion2D(diffusivity, equations_hyperbolic) + LaplaceDiffusion2D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), + typeof(diffusivity)}(diffusivity, equations_hyperbolic) +end -varnames(variable_mapping, equations_parabolic::LaplaceDiffusion2D) = - varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +function varnames(variable_mapping, equations_parabolic::LaplaceDiffusion2D) + varnames(variable_mapping, equations_parabolic.equations_hyperbolic) +end # no orientation specified since the flux is vector-valued function flux(u, gradients, orientation::Integer, equations_parabolic::LaplaceDiffusion2D) - dudx, dudy = gradients - if orientation == 1 - return SVector(equations_parabolic.diffusivity * dudx) - else # if orientation == 2 - return SVector(equations_parabolic.diffusivity * dudy) - end + dudx, dudy = gradients + if orientation == 1 + return SVector(equations_parabolic.diffusivity * dudx) + else # if orientation == 2 + return SVector(equations_parabolic.diffusivity * dudy) + end end # TODO: parabolic; should this remain in the equations file, be moved to solvers, or live in the elixir? # The penalization depends on the solver, but also depends explicitly on physical parameters, # and would probably need to be specialized for every different equation. -function penalty(u_outer, u_inner, inv_h, equations_parabolic::LaplaceDiffusion2D, dg::ViscousFormulationLocalDG) - return dg.penalty_parameter * (u_outer - u_inner) * equations_parabolic.diffusivity +function penalty(u_outer, u_inner, inv_h, equations_parabolic::LaplaceDiffusion2D, + dg::ViscousFormulationLocalDG) + return dg.penalty_parameter * (u_outer - u_inner) * equations_parabolic.diffusivity end # Dirichlet-type boundary condition for use with a parabolic solver in weak form -@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, +@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, equations_parabolic::LaplaceDiffusion2D) - return boundary_condition.boundary_value_function(x, t, equations_parabolic) + return boundary_condition.boundary_value_function(x, t, equations_parabolic) end -@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, +@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, equations_parabolic::LaplaceDiffusion2D) - return flux_inner + return flux_inner end -@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Divergence, +@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Divergence, equations_parabolic::LaplaceDiffusion2D) - return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic) + return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic) end -@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector, - x, t, operator_type::Gradient, +@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, + normal::AbstractVector, + x, t, + operator_type::Gradient, equations_parabolic::LaplaceDiffusion2D) - return flux_inner + return flux_inner end diff --git a/src/equations/lattice_boltzmann_2d.jl b/src/equations/lattice_boltzmann_2d.jl index ee64ae591ae..272dd897ce3 100644 --- a/src/equations/lattice_boltzmann_2d.jl +++ b/src/equations/lattice_boltzmann_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LatticeBoltzmannEquations2D(; Ma, Re, collision_op=collision_bgk, @@ -61,85 +61,90 @@ The main sources for the base implementation were 4. Dieter Krüger et al., **The Lattice Boltzmann Method**, Springer International Publishing, 2017 [doi:10.1007/978-3-319-44649-3](https://doi.org/10.1007/978-3-319-44649-3) """ -struct LatticeBoltzmannEquations2D{RealT<:Real, CollisionOp} <: AbstractLatticeBoltzmannEquations{2, 9} - c::RealT # mean thermal molecular velocity - c_s::RealT # isothermal speed of sound - rho0::RealT # macroscopic reference density +struct LatticeBoltzmannEquations2D{RealT <: Real, CollisionOp} <: + AbstractLatticeBoltzmannEquations{2, 9} + c::RealT # mean thermal molecular velocity + c_s::RealT # isothermal speed of sound + rho0::RealT # macroscopic reference density - Ma::RealT # characteristic Mach number - u0::RealT # macroscopic reference velocity + Ma::RealT # characteristic Mach number + u0::RealT # macroscopic reference velocity - Re::RealT # characteristic Reynolds number - L::RealT # reference length - nu::RealT # kinematic viscosity + Re::RealT # characteristic Reynolds number + L::RealT # reference length + nu::RealT # kinematic viscosity - weights::SVector{9, RealT} # weighting factors for the equilibrium distribution - v_alpha1::SVector{9, RealT} # discrete molecular velocity components in x-direction - v_alpha2::SVector{9, RealT} # discrete molecular velocity components in y-direction + weights::SVector{9, RealT} # weighting factors for the equilibrium distribution + v_alpha1::SVector{9, RealT} # discrete molecular velocity components in x-direction + v_alpha2::SVector{9, RealT} # discrete molecular velocity components in y-direction - collision_op::CollisionOp # collision operator for the collision kernel + collision_op::CollisionOp # collision operator for the collision kernel end -function LatticeBoltzmannEquations2D(; Ma, Re, collision_op=collision_bgk, - c=1, L=1, rho0=1, u0=nothing, nu=nothing) - # Sanity check that exactly one of Ma, u0 is not `nothing` - if isnothing(Ma) && isnothing(u0) - error("Mach number `Ma` and reference speed `u0` may not both be `nothing`") - elseif !isnothing(Ma) && !isnothing(u0) - error("Mach number `Ma` and reference speed `u0` may not both be set") - end - - # Sanity check that exactly one of Re, nu is not `nothing` - if isnothing(Re) && isnothing(nu) - error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`") - elseif !isnothing(Re) && !isnothing(nu) - error("Reynolds number `Re` and visocsity `nu` may not both be set") - end - - # Calculate isothermal speed of sound - # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity - # `c` depends on the used phase space discretization, and is valid for D2Q9 (and others). For - # details, see, e.g., [3] in the docstring above. - c_s = c / sqrt(3) - - # Calculate missing quantities - if isnothing(Ma) - Ma = u0 / c_s - elseif isnothing(u0) - u0 = Ma * c_s - end - if isnothing(Re) - Re = u0 * L / nu - elseif isnothing(nu) - nu = u0 * L / Re - end - - # Promote to common data type - Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu) - - # Source for weights and speeds: [4] in the docstring above - weights = SVector(1/9, 1/9, 1/9, 1/9, 1/36, 1/36, 1/36, 1/36, 4/9) - v_alpha1 = SVector( c, 0, -c, 0, c, -c, -c, c, 0 ) - v_alpha2 = SVector( 0, c, 0, -c, c, c, -c, -c, 0 ) - - LatticeBoltzmannEquations2D(c, c_s, rho0, Ma, u0, Re, L, nu, - weights, v_alpha1, v_alpha2, - collision_op) +function LatticeBoltzmannEquations2D(; Ma, Re, collision_op = collision_bgk, + c = 1, L = 1, rho0 = 1, u0 = nothing, nu = nothing) + # Sanity check that exactly one of Ma, u0 is not `nothing` + if isnothing(Ma) && isnothing(u0) + error("Mach number `Ma` and reference speed `u0` may not both be `nothing`") + elseif !isnothing(Ma) && !isnothing(u0) + error("Mach number `Ma` and reference speed `u0` may not both be set") + end + + # Sanity check that exactly one of Re, nu is not `nothing` + if isnothing(Re) && isnothing(nu) + error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`") + elseif !isnothing(Re) && !isnothing(nu) + error("Reynolds number `Re` and visocsity `nu` may not both be set") + end + + # Calculate isothermal speed of sound + # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity + # `c` depends on the used phase space discretization, and is valid for D2Q9 (and others). For + # details, see, e.g., [3] in the docstring above. + c_s = c / sqrt(3) + + # Calculate missing quantities + if isnothing(Ma) + Ma = u0 / c_s + elseif isnothing(u0) + u0 = Ma * c_s + end + if isnothing(Re) + Re = u0 * L / nu + elseif isnothing(nu) + nu = u0 * L / Re + end + + # Promote to common data type + Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu) + + # Source for weights and speeds: [4] in the docstring above + weights = SVector(1 / 9, 1 / 9, 1 / 9, 1 / 9, 1 / 36, 1 / 36, 1 / 36, 1 / 36, 4 / 9) + v_alpha1 = SVector(c, 0, -c, 0, c, -c, -c, c, 0) + v_alpha2 = SVector(0, c, 0, -c, c, c, -c, -c, 0) + + LatticeBoltzmannEquations2D(c, c_s, rho0, Ma, u0, Re, L, nu, + weights, v_alpha1, v_alpha2, + collision_op) end - -varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations2D) = ntuple(v -> "pdf"*string(v), nvariables(equations)) -varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations2D) = varnames(cons2cons, equations) - +function varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations2D) + ntuple(v -> "pdf" * string(v), nvariables(equations)) +end +function varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations2D) + varnames(cons2cons, equations) +end # Convert conservative variables to macroscopic @inline function cons2macroscopic(u, equations::LatticeBoltzmannEquations2D) - rho = density(u, equations) - v1, v2 = velocity(u, equations) - p = pressure(u, equations) - return SVector(rho, v1, v2, p) + rho = density(u, equations) + v1, v2 = velocity(u, equations) + p = pressure(u, equations) + return SVector(rho, v1, v2, p) +end +function varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D) + ("rho", "v1", "v2", "p") end -varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D) = ("rho", "v1", "v2", "p") # Set initial conditions at physical location `x` for time `t` """ @@ -148,15 +153,14 @@ varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D) = ("rho", "v A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::LatticeBoltzmannEquations2D) - @unpack u0 = equations - rho = pi - v1 = u0 - v2 = u0 + @unpack u0 = equations + rho = pi + v1 = u0 + v2 = u0 - return equilibrium_distribution(rho, v1, v2, equations) + return equilibrium_distribution(rho, v1, v2, equations) end - """ boundary_condition_noslip_wall(u_inner, orientation, direction, x, t, surface_flux_function, @@ -167,94 +171,91 @@ No-slip wall boundary condition using the bounce-back approach. @inline function boundary_condition_noslip_wall(u_inner, orientation, direction, x, t, surface_flux_function, equations::LatticeBoltzmannEquations2D) - # For LBM no-slip wall boundary conditions, we set the boundary state to - # - the inner state for outgoing particle distribution functions - # - the *opposite* inner state for all other particle distribution functions - # See the list of (opposite) directions in the docstring of `LatticeBoltzmannEquations2D`. - if direction == 1 # boundary in -x direction - pdf1 = u_inner[3] - pdf2 = u_inner[4] - pdf3 = u_inner[3] # outgoing - pdf4 = u_inner[2] - pdf5 = u_inner[7] - pdf6 = u_inner[6] # outgoing - pdf7 = u_inner[7] # outgoing - pdf8 = u_inner[6] - pdf9 = u_inner[9] - elseif direction == 2 # boundary in +x direction - pdf1 = u_inner[1] # outgoing - pdf2 = u_inner[4] - pdf3 = u_inner[1] - pdf4 = u_inner[2] - pdf5 = u_inner[5] # outgoing - pdf6 = u_inner[8] - pdf7 = u_inner[5] - pdf8 = u_inner[8] # outgoing - pdf9 = u_inner[9] - elseif direction == 3 # boundary in -y direction - pdf1 = u_inner[3] - pdf2 = u_inner[4] - pdf3 = u_inner[1] - pdf4 = u_inner[4] # outgoing - pdf5 = u_inner[7] - pdf6 = u_inner[8] - pdf7 = u_inner[7] # outgoing - pdf8 = u_inner[8] # outgoing - pdf9 = u_inner[9] - else # boundary in +y direction - pdf1 = u_inner[3] - pdf2 = u_inner[2] # outgoing - pdf3 = u_inner[1] - pdf4 = u_inner[2] - pdf5 = u_inner[5] # outgoing - pdf6 = u_inner[6] # outgoing - pdf7 = u_inner[5] - pdf8 = u_inner[6] - pdf9 = u_inner[9] - end - u_boundary = SVector(pdf1, pdf2, pdf3, pdf4, pdf5, pdf6, pdf7, pdf8, pdf9) - - # Calculate boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equations) - end - - return flux + # For LBM no-slip wall boundary conditions, we set the boundary state to + # - the inner state for outgoing particle distribution functions + # - the *opposite* inner state for all other particle distribution functions + # See the list of (opposite) directions in the docstring of `LatticeBoltzmannEquations2D`. + if direction == 1 # boundary in -x direction + pdf1 = u_inner[3] + pdf2 = u_inner[4] + pdf3 = u_inner[3] # outgoing + pdf4 = u_inner[2] + pdf5 = u_inner[7] + pdf6 = u_inner[6] # outgoing + pdf7 = u_inner[7] # outgoing + pdf8 = u_inner[6] + pdf9 = u_inner[9] + elseif direction == 2 # boundary in +x direction + pdf1 = u_inner[1] # outgoing + pdf2 = u_inner[4] + pdf3 = u_inner[1] + pdf4 = u_inner[2] + pdf5 = u_inner[5] # outgoing + pdf6 = u_inner[8] + pdf7 = u_inner[5] + pdf8 = u_inner[8] # outgoing + pdf9 = u_inner[9] + elseif direction == 3 # boundary in -y direction + pdf1 = u_inner[3] + pdf2 = u_inner[4] + pdf3 = u_inner[1] + pdf4 = u_inner[4] # outgoing + pdf5 = u_inner[7] + pdf6 = u_inner[8] + pdf7 = u_inner[7] # outgoing + pdf8 = u_inner[8] # outgoing + pdf9 = u_inner[9] + else # boundary in +y direction + pdf1 = u_inner[3] + pdf2 = u_inner[2] # outgoing + pdf3 = u_inner[1] + pdf4 = u_inner[2] + pdf5 = u_inner[5] # outgoing + pdf6 = u_inner[6] # outgoing + pdf7 = u_inner[5] + pdf8 = u_inner[6] + pdf9 = u_inner[9] + end + u_boundary = SVector(pdf1, pdf2, pdf3, pdf4, pdf5, pdf6, pdf7, pdf8, pdf9) + + # Calculate boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equations) + end + + return flux end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::LatticeBoltzmannEquations2D) - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equations::LatticeBoltzmannEquations2D) - if orientation == 1 - v_alpha = equations.v_alpha1 - else - v_alpha = equations.v_alpha2 - end - return v_alpha .* u + if orientation == 1 + v_alpha = equations.v_alpha1 + else + v_alpha = equations.v_alpha2 + end + return v_alpha .* u end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation # @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations2D) # λ_max = # end -@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations2D) - if orientation == 1 - v_alpha = equations.v_alpha1 - else - v_alpha = equations.v_alpha2 - end - return 0.5 * ( v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll) ) +@inline function flux_godunov(u_ll, u_rr, orientation::Integer, + equations::LatticeBoltzmannEquations2D) + if orientation == 1 + v_alpha = equations.v_alpha1 + else + v_alpha = equations.v_alpha2 + end + return 0.5 * (v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll)) end - """ density(p::Real, equations::LatticeBoltzmannEquations2D) density(u, equations::LatticeBoltzmannEquations2D) @@ -264,38 +265,36 @@ Calculate the macroscopic density from the pressure `p` or the particle distribu @inline density(p::Real, equations::LatticeBoltzmannEquations2D) = p / equations.c_s^2 @inline density(u, equations::LatticeBoltzmannEquations2D) = sum(u) - """ velocity(u, orientation, equations::LatticeBoltzmannEquations2D) Calculate the macroscopic velocity for the given `orientation` (1 -> x, 2 -> y) from the particle distribution functions `u`. """ -@inline function velocity(u, orientation::Integer, equations::LatticeBoltzmannEquations2D) - if orientation == 1 - v_alpha = equations.v_alpha1 - else - v_alpha = equations.v_alpha2 - end - - return dot(v_alpha, u)/density(u, equations) +@inline function velocity(u, orientation::Integer, + equations::LatticeBoltzmannEquations2D) + if orientation == 1 + v_alpha = equations.v_alpha1 + else + v_alpha = equations.v_alpha2 + end + + return dot(v_alpha, u) / density(u, equations) end - """ velocity(u, equations::LatticeBoltzmannEquations2D) Calculate the macroscopic velocity vector from the particle distribution functions `u`. """ @inline function velocity(u, equations::LatticeBoltzmannEquations2D) - @unpack v_alpha1, v_alpha2 = equations - rho = density(u, equations) + @unpack v_alpha1, v_alpha2 = equations + rho = density(u, equations) - return SVector(dot(v_alpha1, u)/rho, - dot(v_alpha2, u)/rho) + return SVector(dot(v_alpha1, u) / rho, + dot(v_alpha2, u) / rho) end - """ pressure(rho::Real, equations::LatticeBoltzmannEquations2D) pressure(u, equations::LatticeBoltzmannEquations2D) @@ -303,9 +302,12 @@ end Calculate the macroscopic pressure from the density `rho` or the particle distribution functions `u`. """ -@inline pressure(rho::Real, equations::LatticeBoltzmannEquations2D) = rho * equations.c_s^2 -@inline pressure(u, equations::LatticeBoltzmannEquations2D) = pressure(density(u, equations), equations) - +@inline function pressure(rho::Real, equations::LatticeBoltzmannEquations2D) + rho * equations.c_s^2 +end +@inline function pressure(u, equations::LatticeBoltzmannEquations2D) + pressure(density(u, equations), equations) +end """ equilibrium_distribution(alpha, rho, v1, v2, equations::LatticeBoltzmannEquations2D) @@ -313,67 +315,63 @@ Calculate the macroscopic pressure from the density `rho` or the particle distr Calculate the local equilibrium distribution for the distribution function with index `alpha` and given the macroscopic state defined by `rho`, `v1`, `v2`. """ -@inline function equilibrium_distribution(alpha, rho, v1, v2, equations::LatticeBoltzmannEquations2D) - @unpack weights, c_s, v_alpha1, v_alpha2 = equations - - va_v = v_alpha1[alpha]*v1 + v_alpha2[alpha]*v2 - cs_squared = c_s^2 - v_squared = v1^2 + v2^2 - - return weights[alpha] * rho * (1 + va_v/cs_squared - + va_v^2/(2*cs_squared^2) - - v_squared/(2*cs_squared)) +@inline function equilibrium_distribution(alpha, rho, v1, v2, + equations::LatticeBoltzmannEquations2D) + @unpack weights, c_s, v_alpha1, v_alpha2 = equations + + va_v = v_alpha1[alpha] * v1 + v_alpha2[alpha] * v2 + cs_squared = c_s^2 + v_squared = v1^2 + v2^2 + + return weights[alpha] * rho * + (1 + va_v / cs_squared + + va_v^2 / (2 * cs_squared^2) + - + v_squared / (2 * cs_squared)) end - -@inline function equilibrium_distribution(rho, v1, v2, equations::LatticeBoltzmannEquations2D) - return SVector(equilibrium_distribution(1, rho, v1, v2, equations), - equilibrium_distribution(2, rho, v1, v2, equations), - equilibrium_distribution(3, rho, v1, v2, equations), - equilibrium_distribution(4, rho, v1, v2, equations), - equilibrium_distribution(5, rho, v1, v2, equations), - equilibrium_distribution(6, rho, v1, v2, equations), - equilibrium_distribution(7, rho, v1, v2, equations), - equilibrium_distribution(8, rho, v1, v2, equations), - equilibrium_distribution(9, rho, v1, v2, equations)) +@inline function equilibrium_distribution(rho, v1, v2, + equations::LatticeBoltzmannEquations2D) + return SVector(equilibrium_distribution(1, rho, v1, v2, equations), + equilibrium_distribution(2, rho, v1, v2, equations), + equilibrium_distribution(3, rho, v1, v2, equations), + equilibrium_distribution(4, rho, v1, v2, equations), + equilibrium_distribution(5, rho, v1, v2, equations), + equilibrium_distribution(6, rho, v1, v2, equations), + equilibrium_distribution(7, rho, v1, v2, equations), + equilibrium_distribution(8, rho, v1, v2, equations), + equilibrium_distribution(9, rho, v1, v2, equations)) end - function equilibrium_distribution(u, equations::LatticeBoltzmannEquations2D) - rho = density(u, equations) - v1, v2 = velocity(u, equations) + rho = density(u, equations) + v1, v2 = velocity(u, equations) - return equilibrium_distribution(rho, v1, v2, equations) + return equilibrium_distribution(rho, v1, v2, equations) end - """ collision_bgk(u, dt, equations::LatticeBoltzmannEquations2D) Collision operator for the Bhatnagar, Gross, and Krook (BGK) model. """ @inline function collision_bgk(u, dt, equations::LatticeBoltzmannEquations2D) - @unpack c_s, nu = equations - tau = nu / (c_s^2 * dt) - return -(u - equilibrium_distribution(u, equations))/(tau + 1/2) + @unpack c_s, nu = equations + tau = nu / (c_s^2 * dt) + return -(u - equilibrium_distribution(u, equations)) / (tau + 1 / 2) end - - @inline have_constant_speed(::LatticeBoltzmannEquations2D) = True() @inline function max_abs_speeds(equations::LatticeBoltzmannEquations2D) - @unpack c = equations + @unpack c = equations - return c, c + return c, c end - # Convert conservative variables to primitive @inline cons2prim(u, equations::LatticeBoltzmannEquations2D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equations::LatticeBoltzmannEquations2D) = u - - end # @muladd diff --git a/src/equations/lattice_boltzmann_3d.jl b/src/equations/lattice_boltzmann_3d.jl index 2e51af2245b..d3eada15f56 100644 --- a/src/equations/lattice_boltzmann_3d.jl +++ b/src/equations/lattice_boltzmann_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LatticeBoltzmannEquations3D(; Ma, Re, collision_op=collision_bgk, @@ -100,96 +100,103 @@ The main sources for the base implementation were 4. Dieter Krüger et al., **The Lattice Boltzmann Method**, Springer International Publishing, 2017 [doi:10.1007/978-3-319-44649-3](https://doi.org/10.1007/978-3-319-44649-3) """ -struct LatticeBoltzmannEquations3D{RealT<:Real, CollisionOp} <: AbstractLatticeBoltzmannEquations{3, 27} - c::RealT # mean thermal molecular velocity - c_s::RealT # isothermal speed of sound - rho0::RealT # macroscopic reference density +struct LatticeBoltzmannEquations3D{RealT <: Real, CollisionOp} <: + AbstractLatticeBoltzmannEquations{3, 27} + c::RealT # mean thermal molecular velocity + c_s::RealT # isothermal speed of sound + rho0::RealT # macroscopic reference density - Ma::RealT # characteristic Mach number - u0::RealT # macroscopic reference velocity + Ma::RealT # characteristic Mach number + u0::RealT # macroscopic reference velocity - Re::RealT # characteristic Reynolds number - L::RealT # reference length - nu::RealT # kinematic viscosity + Re::RealT # characteristic Reynolds number + L::RealT # reference length + nu::RealT # kinematic viscosity - weights::SVector{27, RealT} # weighting factors for the equilibrium distribution - v_alpha1::SVector{27, RealT} # discrete molecular velocity components in x-direction - v_alpha2::SVector{27, RealT} # discrete molecular velocity components in y-direction - v_alpha3::SVector{27, RealT} # discrete molecular velocity components in z-direction + weights::SVector{27, RealT} # weighting factors for the equilibrium distribution + v_alpha1::SVector{27, RealT} # discrete molecular velocity components in x-direction + v_alpha2::SVector{27, RealT} # discrete molecular velocity components in y-direction + v_alpha3::SVector{27, RealT} # discrete molecular velocity components in z-direction - collision_op::CollisionOp # collision operator for the collision kernel + collision_op::CollisionOp # collision operator for the collision kernel end -function LatticeBoltzmannEquations3D(; Ma, Re, collision_op=collision_bgk, - c=1, L=1, rho0=1, u0=nothing, nu=nothing) - # Sanity check that exactly one of Ma, u0 is not `nothing` - if isnothing(Ma) && isnothing(u0) - error("Mach number `Ma` and reference speed `u0` may not both be `nothing`") - elseif !isnothing(Ma) && !isnothing(u0) - error("Mach number `Ma` and reference speed `u0` may not both be set") - end - - # Sanity check that exactly one of Re, nu is not `nothing` - if isnothing(Re) && isnothing(nu) - error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`") - elseif !isnothing(Re) && !isnothing(nu) - error("Reynolds number `Re` and visocsity `nu` may not both be set") - end - - # Calculate isothermal speed of sound - # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity - # `c` depends on the used phase space discretization, and is valid for D3Q27 (and others). For - # details, see, e.g., [3] in the docstring above. - c_s = c / sqrt(3) - - # Calculate missing quantities - if isnothing(Ma) - Ma = u0 / c_s - elseif isnothing(u0) - u0 = Ma * c_s - end - if isnothing(Re) - Re = u0 * L / nu - elseif isnothing(nu) - nu = u0 * L / Re - end - - # Promote to common data type - Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu) - - # Source for weights and speeds: [4] in docstring above - weights = SVector(2/27, 2/27, 2/27, 2/27, 2/27, 2/27, 1/54, 1/54, 1/54, - 1/54, 1/54, 1/54, 1/54, 1/54, 1/54, 1/54, 1/54, 1/54, - 1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 8/27) - v_alpha1 = SVector( c, -c, 0, 0, 0, 0, c, -c, c, - -c, 0, 0, c, -c, c, -c, 0, 0, - c, -c, c, -c, c, -c, -c, c, 0) - v_alpha2 = SVector( 0, 0, c, -c, 0, 0, c, -c, 0, - 0, c, -c, -c, c, 0, 0, c, -c, - c, -c, c, -c, -c, c, c, -c, 0) - v_alpha3 = SVector( 0, 0, 0, 0, c, -c, 0, 0, c, - -c, c, -c, 0, 0, -c, c, -c, c, - c, -c, -c, c, c, -c, c, -c, 0) - - LatticeBoltzmannEquations3D(c, c_s, rho0, Ma, u0, Re, L, nu, - weights, v_alpha1, v_alpha2, v_alpha3, - collision_op) +function LatticeBoltzmannEquations3D(; Ma, Re, collision_op = collision_bgk, + c = 1, L = 1, rho0 = 1, u0 = nothing, nu = nothing) + # Sanity check that exactly one of Ma, u0 is not `nothing` + if isnothing(Ma) && isnothing(u0) + error("Mach number `Ma` and reference speed `u0` may not both be `nothing`") + elseif !isnothing(Ma) && !isnothing(u0) + error("Mach number `Ma` and reference speed `u0` may not both be set") + end + + # Sanity check that exactly one of Re, nu is not `nothing` + if isnothing(Re) && isnothing(nu) + error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`") + elseif !isnothing(Re) && !isnothing(nu) + error("Reynolds number `Re` and visocsity `nu` may not both be set") + end + + # Calculate isothermal speed of sound + # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity + # `c` depends on the used phase space discretization, and is valid for D3Q27 (and others). For + # details, see, e.g., [3] in the docstring above. + c_s = c / sqrt(3) + + # Calculate missing quantities + if isnothing(Ma) + Ma = u0 / c_s + elseif isnothing(u0) + u0 = Ma * c_s + end + if isnothing(Re) + Re = u0 * L / nu + elseif isnothing(nu) + nu = u0 * L / Re + end + + # Promote to common data type + Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu) + + # Source for weights and speeds: [4] in docstring above + weights = SVector(2 / 27, 2 / 27, 2 / 27, 2 / 27, 2 / 27, 2 / 27, 1 / 54, 1 / 54, + 1 / 54, + 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, + 1 / 54, + 1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216, + 1 / 216, 8 / 27) + v_alpha1 = SVector(c, -c, 0, 0, 0, 0, c, -c, c, + -c, 0, 0, c, -c, c, -c, 0, 0, + c, -c, c, -c, c, -c, -c, c, 0) + v_alpha2 = SVector(0, 0, c, -c, 0, 0, c, -c, 0, + 0, c, -c, -c, c, 0, 0, c, -c, + c, -c, c, -c, -c, c, c, -c, 0) + v_alpha3 = SVector(0, 0, 0, 0, c, -c, 0, 0, c, + -c, c, -c, 0, 0, -c, c, -c, c, + c, -c, -c, c, c, -c, c, -c, 0) + + LatticeBoltzmannEquations3D(c, c_s, rho0, Ma, u0, Re, L, nu, + weights, v_alpha1, v_alpha2, v_alpha3, + collision_op) end - -varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations3D) = ntuple(v -> "pdf"*string(v), Val(nvariables(equations))) -varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations3D) = varnames(cons2cons, equations) - +function varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations3D) + ntuple(v -> "pdf" * string(v), Val(nvariables(equations))) +end +function varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations3D) + varnames(cons2cons, equations) +end # Convert conservative variables to macroscopic @inline function cons2macroscopic(u, equations::LatticeBoltzmannEquations3D) - rho = density(u, equations) - v1, v2, v3 = velocity(u, equations) - p = pressure(u, equations) - return SVector(rho, v1, v2, v3, p) + rho = density(u, equations) + v1, v2, v3 = velocity(u, equations) + p = pressure(u, equations) + return SVector(rho, v1, v2, v3, p) +end +function varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D) + ("rho", "v1", "v2", "v3", "p") end -varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D) = ("rho", "v1", "v2", "v3", "p") - # Set initial conditions at physical location `x` for time `t` """ @@ -198,50 +205,47 @@ varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D) = ("rho", "v A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equations::LatticeBoltzmannEquations3D) - @unpack u0 = equations - rho = pi - v1 = u0 - v2 = u0 - v3 = u0 + @unpack u0 = equations + rho = pi + v1 = u0 + v2 = u0 + v3 = u0 - return equilibrium_distribution(rho, v1, v2, v3, equations) + return equilibrium_distribution(rho, v1, v2, v3, equations) end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::LatticeBoltzmannEquations3D) - # Calculate 1D flux in for a single point @inline function flux(u, orientation::Integer, equations::LatticeBoltzmannEquations3D) - if orientation == 1 # x-direction - v_alpha = equations.v_alpha1 - elseif orientation == 2 # y-direction - v_alpha = equations.v_alpha2 - else # z-direction - v_alpha = equations.v_alpha3 - end - return v_alpha .* u + if orientation == 1 # x-direction + v_alpha = equations.v_alpha1 + elseif orientation == 2 # y-direction + v_alpha = equations.v_alpha2 + else # z-direction + v_alpha = equations.v_alpha3 + end + return v_alpha .* u end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation # @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations3D) # λ_max = # end -@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations3D) - if orientation == 1 # x-direction - v_alpha = equations.v_alpha1 - elseif orientation == 2 # y-direction - v_alpha = equations.v_alpha2 - else # z-direction - v_alpha = equations.v_alpha3 - end - return 0.5 * ( v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll) ) +@inline function flux_godunov(u_ll, u_rr, orientation::Integer, + equations::LatticeBoltzmannEquations3D) + if orientation == 1 # x-direction + v_alpha = equations.v_alpha1 + elseif orientation == 2 # y-direction + v_alpha = equations.v_alpha2 + else # z-direction + v_alpha = equations.v_alpha3 + end + return 0.5 * (v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll)) end - """ density(p::Real, equations::LatticeBoltzmannEquations3D) density(u, equations::LatticeBoltzmannEquations3D) @@ -251,41 +255,39 @@ Calculate the macroscopic density from the pressure `p` or the particle distribu @inline density(p::Real, equations::LatticeBoltzmannEquations3D) = p / equations.c_s^2 @inline density(u, equations::LatticeBoltzmannEquations3D) = sum(u) - """ velocity(u, orientation, equations::LatticeBoltzmannEquations3D) Calculate the macroscopic velocity for the given `orientation` (1 -> x, 2 -> y, 3 -> z) from the particle distribution functions `u`. """ -@inline function velocity(u, orientation::Integer, equations::LatticeBoltzmannEquations3D) - if orientation == 1 # x-direction - v_alpha = equations.v_alpha1 - elseif orientation == 2 # y-direction - v_alpha = equations.v_alpha2 - else # z-direction - v_alpha = equations.v_alpha3 - end - - return dot(v_alpha, u) / density(u, equations) +@inline function velocity(u, orientation::Integer, + equations::LatticeBoltzmannEquations3D) + if orientation == 1 # x-direction + v_alpha = equations.v_alpha1 + elseif orientation == 2 # y-direction + v_alpha = equations.v_alpha2 + else # z-direction + v_alpha = equations.v_alpha3 + end + + return dot(v_alpha, u) / density(u, equations) end - """ velocity(u, equations::LatticeBoltzmannEquations3D) Calculate the macroscopic velocity vector from the particle distribution functions `u`. """ @inline function velocity(u, equations::LatticeBoltzmannEquations3D) - @unpack v_alpha1, v_alpha2, v_alpha3 = equations - rho = density(u, equations) + @unpack v_alpha1, v_alpha2, v_alpha3 = equations + rho = density(u, equations) - return SVector(dot(v_alpha1, u)/rho, - dot(v_alpha2, u)/rho, - dot(v_alpha3, u)/rho) + return SVector(dot(v_alpha1, u) / rho, + dot(v_alpha2, u) / rho, + dot(v_alpha3, u) / rho) end - """ pressure(rho::Real, equations::LatticeBoltzmannEquations3D) pressure(u, equations::LatticeBoltzmannEquations3D) @@ -293,9 +295,12 @@ end Calculate the macroscopic pressure from the density `rho` or the particle distribution functions `u`. """ -@inline pressure(rho::Real, equations::LatticeBoltzmannEquations3D) = rho * equations.c_s^2 -@inline pressure(u, equations::LatticeBoltzmannEquations3D) = pressure(density(u, equations), equations) - +@inline function pressure(rho::Real, equations::LatticeBoltzmannEquations3D) + rho * equations.c_s^2 +end +@inline function pressure(u, equations::LatticeBoltzmannEquations3D) + pressure(density(u, equations), equations) +end """ equilibrium_distribution(alpha, rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D) @@ -303,99 +308,95 @@ Calculate the macroscopic pressure from the density `rho` or the particle distr Calculate the local equilibrium distribution for the distribution function with index `alpha` and given the macroscopic state defined by `rho`, `v1`, `v2`, `v3`. """ -@inline function equilibrium_distribution(alpha, rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D) - @unpack weights, c_s, v_alpha1, v_alpha2, v_alpha3 = equations - - va_v = v_alpha1[alpha]*v1 + v_alpha2[alpha]*v2 + v_alpha3[alpha]*v3 - cs_squared = c_s^2 - v_squared = v1^2 + v2^2 + v3^2 - - return weights[alpha] * rho * (1 + va_v/cs_squared - + va_v^2/(2*cs_squared^2) - - v_squared/(2*cs_squared)) +@inline function equilibrium_distribution(alpha, rho, v1, v2, v3, + equations::LatticeBoltzmannEquations3D) + @unpack weights, c_s, v_alpha1, v_alpha2, v_alpha3 = equations + + va_v = v_alpha1[alpha] * v1 + v_alpha2[alpha] * v2 + v_alpha3[alpha] * v3 + cs_squared = c_s^2 + v_squared = v1^2 + v2^2 + v3^2 + + return weights[alpha] * rho * + (1 + va_v / cs_squared + + va_v^2 / (2 * cs_squared^2) + - + v_squared / (2 * cs_squared)) end - -@inline function equilibrium_distribution(rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D) - return SVector(equilibrium_distribution( 1, rho, v1, v2, v3, equations), - equilibrium_distribution( 2, rho, v1, v2, v3, equations), - equilibrium_distribution( 3, rho, v1, v2, v3, equations), - equilibrium_distribution( 4, rho, v1, v2, v3, equations), - equilibrium_distribution( 5, rho, v1, v2, v3, equations), - equilibrium_distribution( 6, rho, v1, v2, v3, equations), - equilibrium_distribution( 7, rho, v1, v2, v3, equations), - equilibrium_distribution( 8, rho, v1, v2, v3, equations), - equilibrium_distribution( 9, rho, v1, v2, v3, equations), - equilibrium_distribution(10, rho, v1, v2, v3, equations), - equilibrium_distribution(11, rho, v1, v2, v3, equations), - equilibrium_distribution(12, rho, v1, v2, v3, equations), - equilibrium_distribution(13, rho, v1, v2, v3, equations), - equilibrium_distribution(14, rho, v1, v2, v3, equations), - equilibrium_distribution(15, rho, v1, v2, v3, equations), - equilibrium_distribution(16, rho, v1, v2, v3, equations), - equilibrium_distribution(17, rho, v1, v2, v3, equations), - equilibrium_distribution(18, rho, v1, v2, v3, equations), - equilibrium_distribution(19, rho, v1, v2, v3, equations), - equilibrium_distribution(20, rho, v1, v2, v3, equations), - equilibrium_distribution(21, rho, v1, v2, v3, equations), - equilibrium_distribution(22, rho, v1, v2, v3, equations), - equilibrium_distribution(23, rho, v1, v2, v3, equations), - equilibrium_distribution(24, rho, v1, v2, v3, equations), - equilibrium_distribution(25, rho, v1, v2, v3, equations), - equilibrium_distribution(26, rho, v1, v2, v3, equations), - equilibrium_distribution(27, rho, v1, v2, v3, equations)) +@inline function equilibrium_distribution(rho, v1, v2, v3, + equations::LatticeBoltzmannEquations3D) + return SVector(equilibrium_distribution(1, rho, v1, v2, v3, equations), + equilibrium_distribution(2, rho, v1, v2, v3, equations), + equilibrium_distribution(3, rho, v1, v2, v3, equations), + equilibrium_distribution(4, rho, v1, v2, v3, equations), + equilibrium_distribution(5, rho, v1, v2, v3, equations), + equilibrium_distribution(6, rho, v1, v2, v3, equations), + equilibrium_distribution(7, rho, v1, v2, v3, equations), + equilibrium_distribution(8, rho, v1, v2, v3, equations), + equilibrium_distribution(9, rho, v1, v2, v3, equations), + equilibrium_distribution(10, rho, v1, v2, v3, equations), + equilibrium_distribution(11, rho, v1, v2, v3, equations), + equilibrium_distribution(12, rho, v1, v2, v3, equations), + equilibrium_distribution(13, rho, v1, v2, v3, equations), + equilibrium_distribution(14, rho, v1, v2, v3, equations), + equilibrium_distribution(15, rho, v1, v2, v3, equations), + equilibrium_distribution(16, rho, v1, v2, v3, equations), + equilibrium_distribution(17, rho, v1, v2, v3, equations), + equilibrium_distribution(18, rho, v1, v2, v3, equations), + equilibrium_distribution(19, rho, v1, v2, v3, equations), + equilibrium_distribution(20, rho, v1, v2, v3, equations), + equilibrium_distribution(21, rho, v1, v2, v3, equations), + equilibrium_distribution(22, rho, v1, v2, v3, equations), + equilibrium_distribution(23, rho, v1, v2, v3, equations), + equilibrium_distribution(24, rho, v1, v2, v3, equations), + equilibrium_distribution(25, rho, v1, v2, v3, equations), + equilibrium_distribution(26, rho, v1, v2, v3, equations), + equilibrium_distribution(27, rho, v1, v2, v3, equations)) end - function equilibrium_distribution(u, equations::LatticeBoltzmannEquations3D) - rho = density(u, equations) - v1, v2, v3 = velocity(u, equations) + rho = density(u, equations) + v1, v2, v3 = velocity(u, equations) - return equilibrium_distribution(rho, v1, v2, v3, equations) + return equilibrium_distribution(rho, v1, v2, v3, equations) end - """ collision_bgk(u, dt, equations::LatticeBoltzmannEquations3D) Collision operator for the Bhatnagar, Gross, and Krook (BGK) model. """ @inline function collision_bgk(u, dt, equations::LatticeBoltzmannEquations3D) - @unpack c_s, nu = equations - tau = nu / (c_s^2 * dt) - return -(u - equilibrium_distribution(u, equations))/(tau + 1/2) + @unpack c_s, nu = equations + tau = nu / (c_s^2 * dt) + return -(u - equilibrium_distribution(u, equations)) / (tau + 1 / 2) end - - @inline have_constant_speed(::LatticeBoltzmannEquations3D) = True() @inline function max_abs_speeds(equations::LatticeBoltzmannEquations3D) - @unpack c = equations + @unpack c = equations - return c, c, c + return c, c, c end - # Convert conservative variables to primitive @inline cons2prim(u, equations::LatticeBoltzmannEquations3D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equations::LatticeBoltzmannEquations3D) = u - # Calculate kinetic energy for a conservative state `u` @inline function energy_kinetic(u, equations::LatticeBoltzmannEquations3D) - rho = density(u, equations) - v1, v2, v3 = velocity(u, equations) + rho = density(u, equations) + v1, v2, v3 = velocity(u, equations) - return 0.5 * (v1^2 + v2^2 + v3^2) / rho / equations.rho0 + return 0.5 * (v1^2 + v2^2 + v3^2) / rho / equations.rho0 end # Calculate nondimensionalized kinetic energy for a conservative state `u` -@inline function energy_kinetic_nondimensional(u, equations::LatticeBoltzmannEquations3D) - return energy_kinetic(u, equations) / equations.u0^2 +@inline function energy_kinetic_nondimensional(u, + equations::LatticeBoltzmannEquations3D) + return energy_kinetic(u, equations) / equations.u0^2 end - - end # @muladd diff --git a/src/equations/linear_scalar_advection_1d.jl b/src/equations/linear_scalar_advection_1d.jl index a70c3b72b9b..7769cb61fbf 100644 --- a/src/equations/linear_scalar_advection_1d.jl +++ b/src/equations/linear_scalar_advection_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LinearScalarAdvectionEquation1D @@ -14,18 +14,17 @@ The linear scalar advection equation ``` in one space dimension with constant velocity `a`. """ -struct LinearScalarAdvectionEquation1D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{1, 1} - advection_velocity::SVector{1, RealT} +struct LinearScalarAdvectionEquation1D{RealT <: Real} <: + AbstractLinearScalarAdvectionEquation{1, 1} + advection_velocity::SVector{1, RealT} end function LinearScalarAdvectionEquation1D(a::Real) - LinearScalarAdvectionEquation1D(SVector(a)) + LinearScalarAdvectionEquation1D(SVector(a)) end - -varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation1D) = ("scalar", ) -varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar", ) - +varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation1D) = ("scalar",) +varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar",) # Set initial conditions at physical location `x` for time `t` """ @@ -34,13 +33,12 @@ varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar", ) A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation1D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(2.0) + return SVector(2.0) end - """ initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation1D) @@ -48,20 +46,20 @@ A smooth initial condition used for convergence tests (in combination with [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation1D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t - - c = 1.0 - A = 0.5 - L = 2 - f = 1/L - omega = 2 * pi * f - scalar = c + A * sin(omega * sum(x_trans)) - return SVector(scalar) +function initial_condition_convergence_test(x, t, + equation::LinearScalarAdvectionEquation1D) + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t + + c = 1.0 + A = 0.5 + L = 2 + f = 1 / L + omega = 2 * pi * f + scalar = c + A * sin(omega * sum(x_trans)) + return SVector(scalar) end - """ initial_condition_gauss(x, t, equations::LinearScalarAdvectionEquation1D) @@ -69,28 +67,26 @@ A Gaussian pulse used together with [`BoundaryConditionDirichlet(initial_condition_gauss)`](@ref). """ function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation1D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - scalar = exp(-(x_trans[1]^2)) - return SVector(scalar) + scalar = exp(-(x_trans[1]^2)) + return SVector(scalar) end - """ initial_condition_sin(x, t, equations::LinearScalarAdvectionEquation1D) A sine wave in the conserved variable. """ function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation1D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - scalar = sinpi(2 * x_trans[1]) - return SVector(scalar) + scalar = sinpi(2 * x_trans[1]) + return SVector(scalar) end - """ initial_condition_linear_x(x, t, equations::LinearScalarAdvectionEquation1D) @@ -98,10 +94,10 @@ A linear function of `x[1]` used together with [`boundary_condition_linear_x`](@ref). """ function initial_condition_linear_x(x, t, equation::LinearScalarAdvectionEquation1D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(x_trans[1]) + return SVector(x_trans[1]) end """ @@ -115,84 +111,80 @@ Boundary conditions for function boundary_condition_linear_x(u_inner, orientation, direction, x, t, surface_flux_function, equation::LinearScalarAdvectionEquation1D) - u_boundary = initial_condition_linear_x(x, t, equation) + u_boundary = initial_condition_linear_x(x, t, equation) - # Calculate boundary flux - if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equation) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equation) - end + # Calculate boundary flux + if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equation) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equation) + end - return flux + return flux end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::LinearScalarAdvectionEquation1D) - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation1D) - a = equation.advection_velocity[orientation] - return a * u +@inline function flux(u, orientation::Integer, + equation::LinearScalarAdvectionEquation1D) + a = equation.advection_velocity[orientation] + return a * u end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D) - λ_max = abs(equation.advection_velocity[orientation]) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Int, + equation::LinearScalarAdvectionEquation1D) + λ_max = abs(equation.advection_velocity[orientation]) end - # Essentially first order upwind, see e.g. # https://math.stackexchange.com/a/4355076/805029 -function flux_godunov(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] - - v_normal = equation.advection_velocity[orientation] - if v_normal >= 0 - return SVector(v_normal * u_L) - else - return SVector(v_normal * u_R) - end +function flux_godunov(u_ll, u_rr, orientation::Int, + equation::LinearScalarAdvectionEquation1D) + u_L = u_ll[1] + u_R = u_rr[1] + + v_normal = equation.advection_velocity[orientation] + if v_normal >= 0 + return SVector(v_normal * u_L) + else + return SVector(v_normal * u_R) + end end - # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf , # section 4.2.5 and especially equation (4.33). -function flux_engquist_osher(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D) - u_L = u_ll[1] - u_R = u_rr[1] - - return SVector(0.5 * (flux(u_L, orientation, equation) + flux(u_R, orientation, equation) - - abs(equation.advection_velocity[orientation]) * (u_R - u_L))) +function flux_engquist_osher(u_ll, u_rr, orientation::Int, + equation::LinearScalarAdvectionEquation1D) + u_L = u_ll[1] + u_R = u_rr[1] + + return SVector(0.5 * (flux(u_L, orientation, equation) + + flux(u_R, orientation, equation) - + abs(equation.advection_velocity[orientation]) * (u_R - u_L))) end - @inline have_constant_speed(::LinearScalarAdvectionEquation1D) = True() @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation1D) - return abs.(equation.advection_velocity) + return abs.(equation.advection_velocity) end - # Convert conservative variables to primitive @inline cons2prim(u, equation::LinearScalarAdvectionEquation1D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equation::LinearScalarAdvectionEquation1D) = u - # Calculate entropy for a conservative state `cons` @inline entropy(u::Real, ::LinearScalarAdvectionEquation1D) = 0.5 * u^2 @inline entropy(u, equation::LinearScalarAdvectionEquation1D) = entropy(u[1], equation) - # Calculate total energy for a conservative state `cons` @inline energy_total(u::Real, ::LinearScalarAdvectionEquation1D) = 0.5 * u^2 -@inline energy_total(u, equation::LinearScalarAdvectionEquation1D) = energy_total(u[1], equation) - - +@inline function energy_total(u, equation::LinearScalarAdvectionEquation1D) + energy_total(u[1], equation) +end end # @muladd diff --git a/src/equations/linear_scalar_advection_2d.jl b/src/equations/linear_scalar_advection_2d.jl index 9fe7d5f9b90..d90bf0c8793 100644 --- a/src/equations/linear_scalar_advection_2d.jl +++ b/src/equations/linear_scalar_advection_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LinearScalarAdvectionEquation2D @@ -14,28 +14,29 @@ The linear scalar advection equation ``` in two space dimensions with constant velocity `a`. """ -struct LinearScalarAdvectionEquation2D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{2, 1} - advection_velocity::SVector{2, RealT} +struct LinearScalarAdvectionEquation2D{RealT <: Real} <: + AbstractLinearScalarAdvectionEquation{2, 1} + advection_velocity::SVector{2, RealT} end -function LinearScalarAdvectionEquation2D(a::NTuple{2,<:Real}) - LinearScalarAdvectionEquation2D(SVector(a)) +function LinearScalarAdvectionEquation2D(a::NTuple{2, <:Real}) + LinearScalarAdvectionEquation2D(SVector(a)) end function LinearScalarAdvectionEquation2D(a1::Real, a2::Real) - LinearScalarAdvectionEquation2D(SVector(a1, a2)) + LinearScalarAdvectionEquation2D(SVector(a1, a2)) end - -varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation2D) = ("scalar", ) -varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation2D) = ("scalar", ) +varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation2D) = ("scalar",) +varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation2D) = ("scalar",) # Calculates translated coordinates `x` for a periodic domain function x_trans_periodic_2d(x, domain_length = SVector(10, 10), center = SVector(0, 0)) - x_normalized = x .- center - x_shifted = x_normalized .% domain_length - x_offset = ((x_shifted .< -0.5*domain_length) - (x_shifted .> 0.5*domain_length)) .* domain_length - return center + x_shifted + x_offset + x_normalized = x .- center + x_shifted = x_normalized .% domain_length + x_offset = ((x_shifted .< -0.5 * domain_length) - + (x_shifted .> 0.5 * domain_length)) .* domain_length + return center + x_shifted + x_offset end # Set initial conditions at physical location `x` for time `t` @@ -45,32 +46,31 @@ end A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t) + # Store translated coordinate for easy use of exact solution + x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t) - return SVector(2.0) + return SVector(2.0) end - """ initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation2D) A smooth initial condition used for convergence tests. """ -function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t - - c = 1.0 - A = 0.5 - L = 2 - f = 1/L - omega = 2 * pi * f - scalar = c + A * sin(omega * sum(x_trans)) - return SVector(scalar) +function initial_condition_convergence_test(x, t, + equation::LinearScalarAdvectionEquation2D) + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t + + c = 1.0 + A = 0.5 + L = 2 + f = 1 / L + omega = 2 * pi * f + scalar = c + A * sin(omega * sum(x_trans)) + return SVector(scalar) end - """ initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation2D) @@ -78,28 +78,26 @@ A Gaussian pulse used together with [`BoundaryConditionDirichlet(initial_condition_gauss)`](@ref). """ function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t) + # Store translated coordinate for easy use of exact solution + x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t) - scalar = exp(-(x_trans[1]^2 + x_trans[2]^2)) - return SVector(scalar) + scalar = exp(-(x_trans[1]^2 + x_trans[2]^2)) + return SVector(scalar) end - """ initial_condition_sin_sin(x, t, equations::LinearScalarAdvectionEquation2D) A sine wave in the conserved variable. """ function initial_condition_sin_sin(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - scalar = sinpi(2 * x_trans[1]) * sinpi(2 * x_trans[2]) - return SVector(scalar) + scalar = sinpi(2 * x_trans[1]) * sinpi(2 * x_trans[2]) + return SVector(scalar) end - """ initial_condition_linear_x_y(x, t, equations::LinearScalarAdvectionEquation2D) @@ -107,10 +105,10 @@ A linear function of `x[1] + x[2]` used together with [`boundary_condition_linear_x_y`](@ref). """ function initial_condition_linear_x_y(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(sum(x_trans)) + return SVector(sum(x_trans)) end """ @@ -124,19 +122,18 @@ Boundary conditions for function boundary_condition_linear_x_y(u_inner, orientation, direction, x, t, surface_flux_function, equation::LinearScalarAdvectionEquation2D) - u_boundary = initial_condition_linear_x_y(x, t, equation) + u_boundary = initial_condition_linear_x_y(x, t, equation) - # Calculate boundary flux - if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equation) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equation) - end + # Calculate boundary flux + if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equation) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equation) + end - return flux + return flux end - """ initial_condition_linear_x(x, t, equations::LinearScalarAdvectionEquation2D) @@ -144,10 +141,10 @@ A linear function of `x[1]` used together with [`boundary_condition_linear_x`](@ref). """ function initial_condition_linear_x(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(x_trans[1]) + return SVector(x_trans[1]) end """ @@ -161,19 +158,18 @@ Boundary conditions for function boundary_condition_linear_x(u_inner, orientation, direction, x, t, surface_flux_function, equation::LinearScalarAdvectionEquation2D) - u_boundary = initial_condition_linear_x(x, t, equation) + u_boundary = initial_condition_linear_x(x, t, equation) - # Calculate boundary flux - if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equation) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equation) - end + # Calculate boundary flux + if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equation) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equation) + end - return flux + return flux end - """ initial_condition_linear_y(x, t, equations::LinearScalarAdvectionEquation2D) @@ -181,10 +177,10 @@ A linear function of `x[1]` used together with [`boundary_condition_linear_y`](@ref). """ function initial_condition_linear_y(x, t, equation::LinearScalarAdvectionEquation2D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(x_trans[2]) + return SVector(x_trans[2]) end """ @@ -198,102 +194,98 @@ Boundary conditions for function boundary_condition_linear_y(u_inner, orientation, direction, x, t, surface_flux_function, equation::LinearScalarAdvectionEquation2D) - u_boundary = initial_condition_linear_y(x, t, equation) + u_boundary = initial_condition_linear_y(x, t, equation) - # Calculate boundary flux - if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equation) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equation) - end + # Calculate boundary flux + if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equation) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equation) + end - return flux + return flux end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equations::LinearScalarAdvectionEquation2D) - # Calculate 1D flux for a single point -@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation2D) - a = equation.advection_velocity[orientation] - return a * u +@inline function flux(u, orientation::Integer, + equation::LinearScalarAdvectionEquation2D) + a = equation.advection_velocity[orientation] + return a * u end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation2D) - λ_max = abs(equation.advection_velocity[orientation]) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equation::LinearScalarAdvectionEquation2D) + λ_max = abs(equation.advection_velocity[orientation]) end - # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D) - a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction - return a * u +@inline function flux(u, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation2D) + a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction + return a * u end - # Calculate maximum wave speed in the normal direction for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D) - a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction - return abs(a) +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation2D) + a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction + return abs(a) end - # Essentially first order upwind, see e.g. # https://math.stackexchange.com/a/4355076/805029 -function flux_godunov(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation2D) - u_L = u_ll[1] - u_R = u_rr[1] - - v_normal = equation.advection_velocity[orientation] - if v_normal >= 0 - return SVector(v_normal * u_L) - else - return SVector(v_normal * u_R) - end +function flux_godunov(u_ll, u_rr, orientation::Integer, + equation::LinearScalarAdvectionEquation2D) + u_L = u_ll[1] + u_R = u_rr[1] + + v_normal = equation.advection_velocity[orientation] + if v_normal >= 0 + return SVector(v_normal * u_L) + else + return SVector(v_normal * u_R) + end end # Essentially first order upwind, see e.g. # https://math.stackexchange.com/a/4355076/805029 -function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D) - u_L = u_ll[1] - u_R = u_rr[1] - - a_normal = dot(equation.advection_velocity, normal_direction) - if a_normal >= 0 - return SVector(a_normal * u_L) - else - return SVector(a_normal * u_R) - end +function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation2D) + u_L = u_ll[1] + u_R = u_rr[1] + + a_normal = dot(equation.advection_velocity, normal_direction) + if a_normal >= 0 + return SVector(a_normal * u_L) + else + return SVector(a_normal * u_R) + end end - @inline have_constant_speed(::LinearScalarAdvectionEquation2D) = True() @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation2D) - return abs.(equation.advection_velocity) + return abs.(equation.advection_velocity) end - # Convert conservative variables to primitive @inline cons2prim(u, equation::LinearScalarAdvectionEquation2D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equation::LinearScalarAdvectionEquation2D) = u - # Calculate entropy for a conservative state `cons` @inline entropy(u::Real, ::LinearScalarAdvectionEquation2D) = 0.5 * u^2 @inline entropy(u, equation::LinearScalarAdvectionEquation2D) = entropy(u[1], equation) - # Calculate total energy for a conservative state `cons` @inline energy_total(u::Real, ::LinearScalarAdvectionEquation2D) = 0.5 * u^2 -@inline energy_total(u, equation::LinearScalarAdvectionEquation2D) = energy_total(u[1], equation) - - +@inline function energy_total(u, equation::LinearScalarAdvectionEquation2D) + energy_total(u[1], equation) +end end # @muladd diff --git a/src/equations/linear_scalar_advection_3d.jl b/src/equations/linear_scalar_advection_3d.jl index 218eaf8816c..7b19974eb49 100644 --- a/src/equations/linear_scalar_advection_3d.jl +++ b/src/equations/linear_scalar_advection_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LinearScalarAdvectionEquation3D @@ -14,22 +14,21 @@ The linear scalar advection equation ``` in three space dimensions with constant velocity `a`. """ -struct LinearScalarAdvectionEquation3D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{3, 1} - advection_velocity::SVector{3, RealT} +struct LinearScalarAdvectionEquation3D{RealT <: Real} <: + AbstractLinearScalarAdvectionEquation{3, 1} + advection_velocity::SVector{3, RealT} end -function LinearScalarAdvectionEquation3D(a::NTuple{3,<:Real}) - LinearScalarAdvectionEquation3D(SVector(a)) +function LinearScalarAdvectionEquation3D(a::NTuple{3, <:Real}) + LinearScalarAdvectionEquation3D(SVector(a)) end function LinearScalarAdvectionEquation3D(a1::Real, a2::Real, a3::Real) - LinearScalarAdvectionEquation3D(SVector(a1, a2, a3)) + LinearScalarAdvectionEquation3D(SVector(a1, a2, a3)) end - -varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation3D) = ("scalar", ) -varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar", ) - +varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation3D) = ("scalar",) +varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar",) # Set initial conditions at physical location `x` for time `t` """ @@ -38,60 +37,58 @@ varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar", ) A constant initial condition to test free-stream preservation. """ function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation3D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(2.0) + return SVector(2.0) end - """ initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation1D) A smooth initial condition used for convergence tests. """ -function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation3D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t +function initial_condition_convergence_test(x, t, + equation::LinearScalarAdvectionEquation3D) + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - c = 1.0 - A = 0.5 - L = 2 - f = 1/L - omega = 2 * pi * f - scalar = c + A * sin(omega * sum(x_trans)) - return SVector(scalar) + c = 1.0 + A = 0.5 + L = 2 + f = 1 / L + omega = 2 * pi * f + scalar = c + A * sin(omega * sum(x_trans)) + return SVector(scalar) end - """ initial_condition_gauss(x, t, equations::LinearScalarAdvectionEquation1D) A Gaussian pulse. """ function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation3D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - scalar = exp(-(x_trans[1]^2 + x_trans[2]^2 + x_trans[3]^2)) - return SVector(scalar) + scalar = exp(-(x_trans[1]^2 + x_trans[2]^2 + x_trans[3]^2)) + return SVector(scalar) end - """ initial_condition_sin(x, t, equations::LinearScalarAdvectionEquation1D) A sine wave in the conserved variable. """ function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation3D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - scalar = sin(2 * pi * x_trans[1]) * sin(2 * pi * x_trans[2]) * sin(2 * pi * x_trans[3]) - return SVector(scalar) + scalar = sin(2 * pi * x_trans[1]) * sin(2 * pi * x_trans[2]) * + sin(2 * pi * x_trans[3]) + return SVector(scalar) end - """ initial_condition_linear_z(x, t, equations::LinearScalarAdvectionEquation1D) @@ -99,10 +96,10 @@ A linear function of `x[3]` used together with [`boundary_condition_linear_z`](@ref). """ function initial_condition_linear_z(x, t, equation::LinearScalarAdvectionEquation3D) - # Store translated coordinate for easy use of exact solution - x_trans = x - equation.advection_velocity * t + # Store translated coordinate for easy use of exact solution + x_trans = x - equation.advection_velocity * t - return SVector(x_trans[3]) + return SVector(x_trans[3]) end """ @@ -116,102 +113,98 @@ Boundary conditions for function boundary_condition_linear_z(u_inner, orientation, direction, x, t, surface_flux_function, equation::LinearScalarAdvectionEquation3D) - u_boundary = initial_condition_linear_z(x, t, equation) + u_boundary = initial_condition_linear_z(x, t, equation) - # Calculate boundary flux - if direction in (2, 4, 6) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation, equation) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation, equation) - end + # Calculate boundary flux + if direction in (2, 4, 6) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation, equation) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation, equation) + end - return flux + return flux end - # Pre-defined source terms should be implemented as # function source_terms_WHATEVER(u, x, t, equation::LinearScalarAdvectionEquation3D) - # Calculate 1D flux in for a single point -@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation3D) - a = equation.advection_velocity[orientation] - return a * u +@inline function flux(u, orientation::Integer, + equation::LinearScalarAdvectionEquation3D) + a = equation.advection_velocity[orientation] + return a * u end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation3D) - λ_max = abs(equation.advection_velocity[orientation]) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equation::LinearScalarAdvectionEquation3D) + λ_max = abs(equation.advection_velocity[orientation]) end - # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized -@inline function flux(u, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D) - a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction - return a * u +@inline function flux(u, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation3D) + a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction + return a * u end - # Calculate maximum wave speed in the normal direction for local Lax-Friedrichs-type dissipation -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D) - a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction - return abs(a) +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation3D) + a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction + return abs(a) end - # Essentially first order upwind, see e.g. # https://math.stackexchange.com/a/4355076/805029 -function flux_godunov(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation3D) - u_L = u_ll[1] - u_R = u_rr[1] +function flux_godunov(u_ll, u_rr, orientation::Integer, + equation::LinearScalarAdvectionEquation3D) + u_L = u_ll[1] + u_R = u_rr[1] - v_normal = equation.advection_velocity[orientation] - if v_normal >= 0 - return SVector(v_normal * u_L) - else - return SVector(v_normal * u_R) - end + v_normal = equation.advection_velocity[orientation] + if v_normal >= 0 + return SVector(v_normal * u_L) + else + return SVector(v_normal * u_R) + end end # Essentially first order upwind, see e.g. # https://math.stackexchange.com/a/4355076/805029 -function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D) - u_L = u_ll[1] - u_R = u_rr[1] +function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, + equation::LinearScalarAdvectionEquation3D) + u_L = u_ll[1] + u_R = u_rr[1] - a_normal = dot(equation.advection_velocity, normal_direction) - if a_normal >= 0 - return SVector(a_normal * u_L) - else - return SVector(a_normal * u_R) - end + a_normal = dot(equation.advection_velocity, normal_direction) + if a_normal >= 0 + return SVector(a_normal * u_L) + else + return SVector(a_normal * u_R) + end end - @inline have_constant_speed(::LinearScalarAdvectionEquation3D) = True() @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation3D) - return abs.(equation.advection_velocity) + return abs.(equation.advection_velocity) end - # Convert conservative variables to primitive @inline cons2prim(u, equation::LinearScalarAdvectionEquation3D) = u # Convert conservative variables to entropy variables @inline cons2entropy(u, equation::LinearScalarAdvectionEquation3D) = u - # Calculate entropy for a conservative state `cons` @inline entropy(u::Real, ::LinearScalarAdvectionEquation3D) = 0.5 * u^2 @inline entropy(u, equation::LinearScalarAdvectionEquation3D) = entropy(u[1], equation) - # Calculate total energy for a conservative state `cons` @inline energy_total(u::Real, ::LinearScalarAdvectionEquation3D) = 0.5 * u^2 -@inline energy_total(u, equation::LinearScalarAdvectionEquation3D) = energy_total(u[1], equation) - - +@inline function energy_total(u, equation::LinearScalarAdvectionEquation3D) + energy_total(u[1], equation) +end end # @muladd diff --git a/src/equations/linearized_euler_2d.jl b/src/equations/linearized_euler_2d.jl index d1765fd0d7b..cd681365cae 100644 --- a/src/equations/linearized_euler_2d.jl +++ b/src/equations/linearized_euler_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" LinearizedEulerEquations2D(v_mean_global, c_mean_global, rho_mean_global) @@ -32,29 +32,37 @@ Linearized euler equations in two space dimensions. The equations are given by The bar ``\bar{(\cdot)}`` indicates uniform mean flow variables and c is the speed of sound. The unknowns are the acoustic velocities ``v' = (v_1', v_2')``, the pressure ``p'`` and the density ``\rho'``. """ -struct LinearizedEulerEquations2D{RealT<:Real} <: AbstractLinearizedEulerEquations{2, 4} +struct LinearizedEulerEquations2D{RealT <: Real} <: + AbstractLinearizedEulerEquations{2, 4} v_mean_global::SVector{2, RealT} c_mean_global::RealT rho_mean_global::RealT end -function LinearizedEulerEquations2D(v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, rho_mean_global::Real) +function LinearizedEulerEquations2D(v_mean_global::NTuple{2, <:Real}, + c_mean_global::Real, rho_mean_global::Real) if rho_mean_global < 0 - throw(ArgumentError("rho_mean_global must be non-negative")) + throw(ArgumentError("rho_mean_global must be non-negative")) elseif c_mean_global < 0 - throw(ArgumentError("c_mean_global must be non-negative")) + throw(ArgumentError("c_mean_global must be non-negative")) end - return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global) + return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, + rho_mean_global) end -function LinearizedEulerEquations2D(; v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, rho_mean_global::Real) - return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global) +function LinearizedEulerEquations2D(; v_mean_global::NTuple{2, <:Real}, + c_mean_global::Real, rho_mean_global::Real) + return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, + rho_mean_global) end - -varnames(::typeof(cons2cons), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_prime", "v2_prime", "p_prime") -varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_prime", "v2_prime", "p_prime") +function varnames(::typeof(cons2cons), ::LinearizedEulerEquations2D) + ("rho_prime", "v1_prime", "v2_prime", "p_prime") +end +function varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D) + ("rho_prime", "v1_prime", "v2_prime", "p_prime") +end """ initial_condition_convergence_test(x, t, equations::LinearizedEulerEquations2D) @@ -62,23 +70,23 @@ varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_ A smooth initial condition used for convergence tests. """ function initial_condition_convergence_test(x, t, equations::LinearizedEulerEquations2D) - rho_prime = -cospi(2*t) * (sinpi(2*x[1]) + sinpi(2*x[2])) - v1_prime = sinpi(2*t) * cospi(2*x[1]) - v2_prime = sinpi(2*t) * cospi(2*x[2]) + rho_prime = -cospi(2 * t) * (sinpi(2 * x[1]) + sinpi(2 * x[2])) + v1_prime = sinpi(2 * t) * cospi(2 * x[1]) + v2_prime = sinpi(2 * t) * cospi(2 * x[2]) p_prime = rho_prime return SVector(rho_prime, v1_prime, v2_prime, p_prime) end - """ boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function, equations::LinearizedEulerEquations2D) Boundary conditions for a solid wall. """ -function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function, - equations::LinearizedEulerEquations2D) +function boundary_condition_wall(u_inner, orientation, direction, x, t, + surface_flux_function, + equations::LinearizedEulerEquations2D) # Boundary state is equal to the inner state except for the velocity. For boundaries # in the -x/+x direction, we multiply the velocity in the x direction by -1. # Similarly, for boundaries in the -y/+y direction, we multiply the velocity in the @@ -99,7 +107,6 @@ function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_ return flux end - # Calculate 1D flux for a single point @inline function flux(u, orientation::Integer, equations::LinearizedEulerEquations2D) @unpack v_mean_global, c_mean_global, rho_mean_global = equations @@ -119,7 +126,6 @@ end return SVector(f1, f2, f3, f4) end - @inline have_constant_speed(::LinearizedEulerEquations2D) = True() @inline function max_abs_speeds(equations::LinearizedEulerEquations2D) @@ -127,7 +133,8 @@ end return abs(v_mean_global[1]) + c_mean_global, abs(v_mean_global[2]) + c_mean_global end -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LinearizedEulerEquations2D) +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::LinearizedEulerEquations2D) @unpack v_mean_global, c_mean_global = equations if orientation == 1 return abs(v_mean_global[1]) + c_mean_global @@ -136,10 +143,7 @@ end end end - # Convert conservative variables to primitive @inline cons2prim(u, equations::LinearizedEulerEquations2D) = u @inline cons2entropy(u, ::LinearizedEulerEquations2D) = u - - end # muladd diff --git a/src/equations/numerical_fluxes.jl b/src/equations/numerical_fluxes.jl index ff9596848bb..16a83124d14 100644 --- a/src/equations/numerical_fluxes.jl +++ b/src/equations/numerical_fluxes.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This file contains general numerical fluxes that are not specific to certain equations @@ -16,34 +16,37 @@ DG method (except floating point errors). """ @inline function flux_central(u_ll, u_rr, orientation_or_normal_direction, equations::AbstractEquations) - # Calculate regular 1D fluxes - f_ll = flux(u_ll, orientation_or_normal_direction, equations) - f_rr = flux(u_rr, orientation_or_normal_direction, equations) + # Calculate regular 1D fluxes + f_ll = flux(u_ll, orientation_or_normal_direction, equations) + f_rr = flux(u_rr, orientation_or_normal_direction, equations) - # Average regular fluxes - return 0.5 * (f_ll + f_rr) + # Average regular fluxes + return 0.5 * (f_ll + f_rr) end - """ FluxPlusDissipation(numerical_flux, dissipation) Combine a `numerical_flux` with a `dissipation` operator to create a new numerical flux. """ struct FluxPlusDissipation{NumericalFlux, Dissipation} - numerical_flux::NumericalFlux - dissipation::Dissipation + numerical_flux::NumericalFlux + dissipation::Dissipation end -@inline function (numflux::FluxPlusDissipation)(u_ll, u_rr, orientation_or_normal_direction, equations) - @unpack numerical_flux, dissipation = numflux +@inline function (numflux::FluxPlusDissipation)(u_ll, u_rr, + orientation_or_normal_direction, + equations) + @unpack numerical_flux, dissipation = numflux - return ( numerical_flux(u_ll, u_rr, orientation_or_normal_direction, equations) - + dissipation(u_ll, u_rr, orientation_or_normal_direction, equations) ) + return (numerical_flux(u_ll, u_rr, orientation_or_normal_direction, equations) + + + dissipation(u_ll, u_rr, orientation_or_normal_direction, equations)) end -Base.show(io::IO, f::FluxPlusDissipation) = print(io, "FluxPlusDissipation(", f.numerical_flux, ", ", f.dissipation, ")") - +function Base.show(io::IO, f::FluxPlusDissipation) + print(io, "FluxPlusDissipation(", f.numerical_flux, ", ", f.dissipation, ")") +end """ FluxRotated(numerical_flux) @@ -55,57 +58,56 @@ Requires a rotationally invariant equation with equation-specific functions [`rotate_to_x`](@ref) and [`rotate_from_x`](@ref). """ struct FluxRotated{NumericalFlux} - numerical_flux::NumericalFlux + numerical_flux::NumericalFlux end - # Rotated surface flux computation (2D version) -@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, normal_direction::AbstractVector, +@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, + normal_direction::AbstractVector, equations::AbstractEquations{2}) - @unpack numerical_flux = flux_rotated + @unpack numerical_flux = flux_rotated - norm_ = norm(normal_direction) - # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later - normal_vector = normal_direction / norm_ + norm_ = norm(normal_direction) + # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later + normal_vector = normal_direction / norm_ - u_ll_rotated = rotate_to_x(u_ll, normal_vector, equations) - u_rr_rotated = rotate_to_x(u_rr, normal_vector, equations) + u_ll_rotated = rotate_to_x(u_ll, normal_vector, equations) + u_rr_rotated = rotate_to_x(u_rr, normal_vector, equations) - f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations) + f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations) - return rotate_from_x(f, normal_vector, equations) * norm_ + return rotate_from_x(f, normal_vector, equations) * norm_ end - # Rotated surface flux computation (3D version) -@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, normal_direction::AbstractVector, +@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, + normal_direction::AbstractVector, equations::AbstractEquations{3}) - @unpack numerical_flux = flux_rotated + @unpack numerical_flux = flux_rotated - # Storing these vectors could increase the performance by 20 percent - norm_ = norm(normal_direction) - # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later - normal_vector = normal_direction / norm_ + # Storing these vectors could increase the performance by 20 percent + norm_ = norm(normal_direction) + # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later + normal_vector = normal_direction / norm_ - # Some vector that can't be identical to normal_vector (unless normal_vector == 0) - tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1]) - # Orthogonal projection - tangent1 -= dot(normal_vector, tangent1) * normal_vector - tangent1 = normalize(tangent1) + # Some vector that can't be identical to normal_vector (unless normal_vector == 0) + tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1]) + # Orthogonal projection + tangent1 -= dot(normal_vector, tangent1) * normal_vector + tangent1 = normalize(tangent1) - # Third orthogonal vector - tangent2 = normalize(cross(normal_direction, tangent1)) + # Third orthogonal vector + tangent2 = normalize(cross(normal_direction, tangent1)) - u_ll_rotated = rotate_to_x(u_ll, normal_vector, tangent1, tangent2, equations) - u_rr_rotated = rotate_to_x(u_rr, normal_vector, tangent1, tangent2, equations) + u_ll_rotated = rotate_to_x(u_ll, normal_vector, tangent1, tangent2, equations) + u_rr_rotated = rotate_to_x(u_rr, normal_vector, tangent1, tangent2, equations) - f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations) + f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations) - return rotate_from_x(f, normal_vector, tangent1, tangent2, equations) * norm_ + return rotate_from_x(f, normal_vector, tangent1, tangent2, equations) * norm_ end -Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(", f.numerical_flux, ")") - +Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(", f.numerical_flux, ")") """ DissipationGlobalLaxFriedrichs(λ) @@ -113,21 +115,26 @@ Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(", f.numerical_flux, Create a global Lax-Friedrichs dissipation operator with dissipation coefficient `λ`. """ struct DissipationGlobalLaxFriedrichs{RealT} - λ::RealT + λ::RealT end -@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, orientation::Integer, equations) - @unpack λ = dissipation - return -λ/2 * (u_rr - u_ll) +@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, + orientation::Integer, + equations) + @unpack λ = dissipation + return -λ / 2 * (u_rr - u_ll) end -@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, normal_direction::AbstractVector, equations) - @unpack λ = dissipation - return -λ/2 * norm(normal_direction) * (u_rr - u_ll) +@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, + normal_direction::AbstractVector, + equations) + @unpack λ = dissipation + return -λ / 2 * norm(normal_direction) * (u_rr - u_ll) end -Base.show(io::IO, d::DissipationGlobalLaxFriedrichs) = print(io, "DissipationGlobalLaxFriedrichs(", d.λ, ")") - +function Base.show(io::IO, d::DissipationGlobalLaxFriedrichs) + print(io, "DissipationGlobalLaxFriedrichs(", d.λ, ")") +end """ DissipationLocalLaxFriedrichs(max_abs_speed=max_abs_speed_naive) @@ -138,18 +145,22 @@ is estimated as defaulting to [`max_abs_speed_naive`](@ref). """ struct DissipationLocalLaxFriedrichs{MaxAbsSpeed} - max_abs_speed::MaxAbsSpeed + max_abs_speed::MaxAbsSpeed end DissipationLocalLaxFriedrichs() = DissipationLocalLaxFriedrichs(max_abs_speed_naive) -@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction, equations) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - return -0.5 * λ * (u_rr - u_ll) +@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, + orientation_or_normal_direction, + equations) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + return -0.5 * λ * (u_rr - u_ll) end -Base.show(io::IO, d::DissipationLocalLaxFriedrichs) = print(io, "DissipationLocalLaxFriedrichs(", d.max_abs_speed, ")") - +function Base.show(io::IO, d::DissipationLocalLaxFriedrichs) + print(io, "DissipationLocalLaxFriedrichs(", d.max_abs_speed, ")") +end """ max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations) @@ -164,11 +175,16 @@ For non-integer arguments `normal_direction` in one dimension, `max_abs_speed_na function max_abs_speed_naive end # for non-integer `orientation_or_normal` arguments. -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::AbstractEquations{1}) - return abs(normal_direction[1]) * max_abs_speed_naive(u_ll, u_rr, 1, equations) +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::AbstractEquations{1}) + return abs(normal_direction[1]) * max_abs_speed_naive(u_ll, u_rr, 1, equations) end -const FluxLaxFriedrichs{MaxAbsSpeed} = FluxPlusDissipation{typeof(flux_central), DissipationLocalLaxFriedrichs{MaxAbsSpeed}} +const FluxLaxFriedrichs{MaxAbsSpeed} = FluxPlusDissipation{typeof(flux_central), + DissipationLocalLaxFriedrichs{ + MaxAbsSpeed + } + } """ FluxLaxFriedrichs(max_abs_speed=max_abs_speed_naive) @@ -176,11 +192,13 @@ Local Lax-Friedrichs (Rusanov) flux with maximum wave speed estimate provided by `max_abs_speed`, cf. [`DissipationLocalLaxFriedrichs`](@ref) and [`max_abs_speed_naive`](@ref). """ -function FluxLaxFriedrichs(max_abs_speed=max_abs_speed_naive) - FluxPlusDissipation(flux_central, DissipationLocalLaxFriedrichs(max_abs_speed)) +function FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive) + FluxPlusDissipation(flux_central, DissipationLocalLaxFriedrichs(max_abs_speed)) end -Base.show(io::IO, f::FluxLaxFriedrichs) = print(io, "FluxLaxFriedrichs(", f.dissipation.max_abs_speed, ")") +function Base.show(io::IO, f::FluxLaxFriedrichs) + print(io, "FluxLaxFriedrichs(", f.dissipation.max_abs_speed, ")") +end """ flux_lax_friedrichs @@ -189,7 +207,6 @@ See [`FluxLaxFriedrichs`](@ref). """ const flux_lax_friedrichs = FluxLaxFriedrichs() - """ FluxHLL(min_max_speed=min_max_speed_naive) @@ -199,7 +216,7 @@ wave speeds are estimated as defaulting to [`min_max_speed_naive`](@ref). """ struct FluxHLL{MinMaxSpeed} - min_max_speed::MinMaxSpeed + min_max_speed::MinMaxSpeed end FluxHLL() = FluxHLL(min_max_speed_naive) @@ -217,22 +234,24 @@ left and right states `u_ll, u_rr`, usually based only on the local wave speeds """ function min_max_speed_naive end -@inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction, equations) - λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - - if λ_min >= 0 && λ_max >= 0 - return flux(u_ll, orientation_or_normal_direction, equations) - elseif λ_max <= 0 && λ_min <= 0 - return flux(u_rr, orientation_or_normal_direction, equations) - else - f_ll = flux(u_ll, orientation_or_normal_direction, equations) - f_rr = flux(u_rr, orientation_or_normal_direction, equations) - inv_λ_max_minus_λ_min = inv(λ_max - λ_min) - factor_ll = λ_max * inv_λ_max_minus_λ_min - factor_rr = λ_min * inv_λ_max_minus_λ_min - factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min - return factor_ll * f_ll - factor_rr * f_rr + factor_diss * (u_rr - u_ll) - end +@inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction, + equations) + λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + + if λ_min >= 0 && λ_max >= 0 + return flux(u_ll, orientation_or_normal_direction, equations) + elseif λ_max <= 0 && λ_min <= 0 + return flux(u_rr, orientation_or_normal_direction, equations) + else + f_ll = flux(u_ll, orientation_or_normal_direction, equations) + f_rr = flux(u_rr, orientation_or_normal_direction, equations) + inv_λ_max_minus_λ_min = inv(λ_max - λ_min) + factor_ll = λ_max * inv_λ_max_minus_λ_min + factor_rr = λ_min * inv_λ_max_minus_λ_min + factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min + return factor_ll * f_ll - factor_rr * f_rr + factor_diss * (u_rr - u_ll) + end end Base.show(io::IO, numflux::FluxHLL) = print(io, "FluxHLL(", numflux.min_max_speed, ")") @@ -244,8 +263,6 @@ See [`FluxHLL`](@ref). """ const flux_hll = FluxHLL() - - """ flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, equations) @@ -254,8 +271,9 @@ methods, e.g., when used with [`VolumeIntegralFluxDifferencing`](@ref). These specialized methods may enable better use of SIMD instructions to increase runtime efficiency on modern hardware. """ -@inline function flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, equations) - flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations) +@inline function flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, + equations) + flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations) end """ @@ -266,11 +284,11 @@ methods, e.g., when used with [`VolumeIntegralFluxDifferencing`](@ref). These specialized methods may enable better use of SIMD instructions to increase runtime efficiency on modern hardware. """ -@inline function flux_ranocha_turbo(u_ll, u_rr, orientation_or_normal_direction, equations) - flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations) +@inline function flux_ranocha_turbo(u_ll, u_rr, orientation_or_normal_direction, + equations) + flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations) end - """ FluxHydrostaticReconstruction(numerical_flux, hydrostatic_reconstruction) @@ -299,23 +317,23 @@ fronts. A good overview of the development and application of hydrostatic recons [DOI: 10.1016/j.advwatres.2019.03.010](https://doi.org/10.1016/j.advwatres.2019.03.010) """ struct FluxHydrostaticReconstruction{NumericalFlux, HydrostaticReconstruction} - numerical_flux::NumericalFlux - hydrostatic_reconstruction::HydrostaticReconstruction + numerical_flux::NumericalFlux + hydrostatic_reconstruction::HydrostaticReconstruction end @inline function (numflux::FluxHydrostaticReconstruction)(u_ll, u_rr, orientation_or_normal_direction, equations::AbstractEquations) - @unpack numerical_flux, hydrostatic_reconstruction = numflux + @unpack numerical_flux, hydrostatic_reconstruction = numflux - # Create the reconstructed left/right solution states in conservative form - u_ll_star, u_rr_star = hydrostatic_reconstruction(u_ll, u_rr, equations) + # Create the reconstructed left/right solution states in conservative form + u_ll_star, u_rr_star = hydrostatic_reconstruction(u_ll, u_rr, equations) - # Use the reconstructed states to compute the numerical surface flux - return numerical_flux(u_ll_star, u_rr_star, orientation_or_normal_direction, equations) + # Use the reconstructed states to compute the numerical surface flux + return numerical_flux(u_ll_star, u_rr_star, orientation_or_normal_direction, + equations) end - """ FluxUpwind(splitting) @@ -330,17 +348,15 @@ as numerical flux (up to floating point differences). This is an experimental feature and may change in future releases. """ struct FluxUpwind{Splitting} - splitting::Splitting + splitting::Splitting end @inline function (numflux::FluxUpwind)(u_ll, u_rr, orientation::Int, equations) - @unpack splitting = numflux - fm = splitting(u_rr, Val{:minus}(), orientation, equations) - fp = splitting(u_ll, Val{:plus}(), orientation, equations) - return fm + fp + @unpack splitting = numflux + fm = splitting(u_rr, Val{:minus}(), orientation, equations) + fp = splitting(u_ll, Val{:plus}(), orientation, equations) + return fm + fp end -Base.show(io::IO, f::FluxUpwind) = print(io, "FluxUpwind(", f.splitting, ")") - - +Base.show(io::IO, f::FluxUpwind) = print(io, "FluxUpwind(", f.splitting, ")") end # @muladd diff --git a/src/equations/shallow_water_1d.jl b/src/equations/shallow_water_1d.jl index 949c6576006..851cbacdd57 100644 --- a/src/equations/shallow_water_1d.jl +++ b/src/equations/shallow_water_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" ShallowWaterEquations1D(gravity, H0) @@ -44,27 +44,25 @@ References for the SWE are many but a good introduction is available in Chapter Finite Volume Methods for Hyperbolic Problems [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253) """ -struct ShallowWaterEquations1D{RealT<:Real} <: AbstractShallowWaterEquations{1, 3} - gravity::RealT # gravitational constant - H0::RealT # constant "lake-at-rest" total water height +struct ShallowWaterEquations1D{RealT <: Real} <: AbstractShallowWaterEquations{1, 3} + gravity::RealT # gravitational constant + H0::RealT # constant "lake-at-rest" total water height end # Allow for flexibility to set the gravitational constant within an elixir depending on the # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values. # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest" # well-balancedness test cases -function ShallowWaterEquations1D(; gravity_constant, H0=0.0) - ShallowWaterEquations1D(gravity_constant, H0) +function ShallowWaterEquations1D(; gravity_constant, H0 = 0.0) + ShallowWaterEquations1D(gravity_constant, H0) end - have_nonconservative_terms(::ShallowWaterEquations1D) = True() varnames(::typeof(cons2cons), ::ShallowWaterEquations1D) = ("h", "h_v", "b") # Note, we use the total water height, H = h + b, as the first primitive variable for easier # visualization and setting initial conditions varnames(::typeof(cons2prim), ::ShallowWaterEquations1D) = ("H", "v", "b") - # Set initial conditions at physical location `x` for time `t` """ initial_condition_convergence_test(x, t, equations::ShallowWaterEquations1D) @@ -75,15 +73,15 @@ A smooth initial condition used for convergence tests in combination with """ function initial_condition_convergence_test(x, t, equations::ShallowWaterEquations1D) - # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)] - c = 7.0 - omega_x = 2.0 * pi * sqrt(2.0) - omega_t = 2.0 * pi - - H = c + cos(omega_x * x[1]) * cos(omega_t * t) - v = 0.5 - b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1]) - return prim2cons(SVector(H, v, b), equations) + # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)] + c = 7.0 + omega_x = 2.0 * pi * sqrt(2.0) + omega_t = 2.0 * pi + + H = c + cos(omega_x * x[1]) * cos(omega_t * t) + v = 0.5 + b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1]) + return prim2cons(SVector(H, v, b), equations) end """ @@ -98,31 +96,32 @@ This manufactured solution source term is specifically designed for the bottom t as defined in [`initial_condition_convergence_test`](@ref). """ -@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterEquations1D) - # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because - # this manufactured solution velocity is taken to be constant - c = 7.0 - omega_x = 2.0 * pi * sqrt(2.0) - omega_t = 2.0 * pi - omega_b = sqrt(2.0) * pi - v = 0.5 - - sinX, cosX = sincos(omega_x * x[1]) - sinT, cosT = sincos(omega_t * t ) - - H = c + cosX * cosT - H_x = -omega_x * sinX * cosT - # this time derivative for the water height exploits that the bottom topography is - # fixed in time such that H_t = (h+b)_t = h_t + 0 - H_t = -omega_t * cosX * sinT - - # bottom topography and its spatial derivative - b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1]) - b_x = 0.5 * omega_b * cos(omega_b * x[1]) - - du1 = H_t + v * (H_x - b_x) - du2 = v * du1 + equations.gravity * (H - b) * H_x - return SVector(du1, du2, 0.0) +@inline function source_terms_convergence_test(u, x, t, + equations::ShallowWaterEquations1D) + # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because + # this manufactured solution velocity is taken to be constant + c = 7.0 + omega_x = 2.0 * pi * sqrt(2.0) + omega_t = 2.0 * pi + omega_b = sqrt(2.0) * pi + v = 0.5 + + sinX, cosX = sincos(omega_x * x[1]) + sinT, cosT = sincos(omega_t * t) + + H = c + cosX * cosT + H_x = -omega_x * sinX * cosT + # this time derivative for the water height exploits that the bottom topography is + # fixed in time such that H_t = (h+b)_t = h_t + 0 + H_t = -omega_t * cosX * sinT + + # bottom topography and its spatial derivative + b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1]) + b_x = 0.5 * omega_b * cos(omega_b * x[1]) + + du1 = H_t + v * (H_x - b_x) + du2 = v * du1 + equations.gravity * (H - b) * H_x + return SVector(du1, du2, 0.0) end """ @@ -132,17 +131,16 @@ A weak blast wave discontinuity useful for testing, e.g., total energy conservat Note for the shallow water equations to the total energy acts as a mathematical entropy function. """ function initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations1D) + inicenter = 0.7 + x_norm = x[1] - inicenter + r = abs(x_norm) - inicenter = 0.7 - x_norm = x[1] - inicenter - r = abs(x_norm) + # Calculate primitive variables + H = r > 0.5 ? 3.25 : 4.0 + v = r > 0.5 ? 0.0 : 0.1882 + b = sin(x[1]) # arbitrary continuous function - # Calculate primitive variables - H = r > 0.5 ? 3.25 : 4.0 - v = r > 0.5 ? 0.0 : 0.1882 - b = sin(x[1]) # arbitrary continuous function - - return prim2cons(SVector(H, v, b), equations) + return prim2cons(SVector(H, v, b), equations) end """ @@ -164,33 +162,35 @@ For details see Section 9.2.5 of the book: surface_flux_function, equations::ShallowWaterEquations1D) - # create the "external" boundary solution state - u_boundary = SVector(u_inner[1], - -u_inner[2], - u_inner[3]) - - # calculate the boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations) - end - - return flux + # create the "external" boundary solution state + u_boundary = SVector(u_inner[1], + -u_inner[2], + u_inner[3]) + + # calculate the boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, + equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, + equations) + end + + return flux end # Calculate 1D flux for a single point # Note, the bottom topography has no flux @inline function flux(u, orientation::Integer, equations::ShallowWaterEquations1D) - h, h_v, _ = u - v = velocity(u, equations) + h, h_v, _ = u + v = velocity(u, equations) - p = 0.5 * equations.gravity * h^2 + p = 0.5 * equations.gravity * h^2 - f1 = h_v - f2 = h_v * v + p + f1 = h_v + f2 = h_v * v + p - return SVector(f1, f2, zero(eltype(u))) + return SVector(f1, f2, zero(eltype(u))) end """ @@ -208,16 +208,16 @@ Further details are available in the paper: """ @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Pull the necessary left and right state information - h_ll = waterheight(u_ll, equations) - b_rr = u_rr[3] + # Pull the necessary left and right state information + h_ll = waterheight(u_ll, equations) + b_rr = u_rr[3] - z = zero(eltype(u_ll)) + z = zero(eltype(u_ll)) - # Bottom gradient nonconservative term: (0, g h b_x, 0) - f = SVector(z, equations.gravity * h_ll * b_rr, z) + # Bottom gradient nonconservative term: (0, g h b_x, 0) + f = SVector(z, equations.gravity * h_ll * b_rr, z) - return f + return f end """ @@ -245,27 +245,27 @@ and for curvilinear 2D case in the paper: """ @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Pull the necessary left and right state information - h_ll, _, b_ll = u_ll - h_rr, _, b_rr = u_rr + # Pull the necessary left and right state information + h_ll, _, b_ll = u_ll + h_rr, _, b_rr = u_rr - h_average = 0.5 * (h_ll + h_rr) - b_jump = b_rr - b_ll + h_average = 0.5 * (h_ll + h_rr) + b_jump = b_rr - b_ll - # Includes two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid - # cross-averaging across a discontinuous bottom topography - # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry - z = zero(eltype(u_ll)) + # Includes two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid + # cross-averaging across a discontinuous bottom topography + # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry + z = zero(eltype(u_ll)) - f = SVector(z, - equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump, - z) + f = SVector(z, + equations.gravity * h_ll * b_ll + + equations.gravity * h_average * b_jump, + z) - return f + return f end - """ flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) @@ -287,26 +287,26 @@ Further details on the hydrostatic reconstruction and its motivation can be foun @inline function flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Pull the water height and bottom topography on the left - h_ll, _, b_ll = u_ll - - # Create the hydrostatic reconstruction for the left solution state - u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) - - # Copy the reconstructed water height for easier to read code - h_ll_star = u_ll_star[1] - - z = zero(eltype(u_ll)) - # Includes two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid - # cross-averaging across a discontinuous bottom topography - # (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry - return SVector(z, - equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ), - z) + # Pull the water height and bottom topography on the left + h_ll, _, b_ll = u_ll + + # Create the hydrostatic reconstruction for the left solution state + u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) + + # Copy the reconstructed water height for easier to read code + h_ll_star = u_ll_star[1] + + z = zero(eltype(u_ll)) + # Includes two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid + # cross-averaging across a discontinuous bottom topography + # (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry + return SVector(z, + equations.gravity * h_ll * b_ll + + equations.gravity * (h_ll^2 - h_ll_star^2), + z) end - """ flux_fjordholm_etal(u_ll, u_rr, orientation, equations::ShallowWaterEquations1D) @@ -320,23 +320,24 @@ Details are available in Eq. (4.1) in the paper: Well-balanced and energy stable schemes for the shallow water equations with discontinuous topography [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042) """ -@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Unpack left and right state - h_ll = waterheight(u_ll, equations) - v_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - h_avg = 0.5 * (h_ll + h_rr ) - v_avg = 0.5 * (v_ll + v_rr ) - p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2) - - # Calculate fluxes depending on orientation - f1 = h_avg * v_avg - f2 = f1 * v_avg + p_avg - - return SVector(f1, f2, zero(eltype(u_ll))) +@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations1D) + # Unpack left and right state + h_ll = waterheight(u_ll, equations) + v_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + h_avg = 0.5 * (h_ll + h_rr) + v_avg = 0.5 * (v_ll + v_rr) + p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2) + + # Calculate fluxes depending on orientation + f1 = h_avg * v_avg + f2 = f1 * v_avg + p_avg + + return SVector(f1, f2, zero(eltype(u_ll))) end """ @@ -353,27 +354,27 @@ Further details are available in Theorem 1 of the paper: shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036) """ -@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Unpack left and right state - h_ll, h_v_ll, _ = u_ll - h_rr, h_v_rr, _ = u_rr +@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations1D) + # Unpack left and right state + h_ll, h_v_ll, _ = u_ll + h_rr, h_v_rr, _ = u_rr - # Get the velocities on either side - v_ll = velocity(u_ll, equations) - v_rr = velocity(u_rr, equations) + # Get the velocities on either side + v_ll = velocity(u_ll, equations) + v_rr = velocity(u_rr, equations) - # Average each factor of products in flux - v_avg = 0.5 * (v_ll + v_rr) - p_avg = 0.5 * equations.gravity * h_ll * h_rr + # Average each factor of products in flux + v_avg = 0.5 * (v_ll + v_rr) + p_avg = 0.5 * equations.gravity * h_ll * h_rr - # Calculate fluxes depending on orientation - f1 = 0.5 * (h_v_ll + h_v_rr) - f2 = f1 * v_avg + p_avg + # Calculate fluxes depending on orientation + f1 = 0.5 * (h_v_ll + h_v_rr) + f2 = f1 * v_avg + p_avg - return SVector(f1, f2, zero(eltype(u_ll))) + return SVector(f1, f2, zero(eltype(u_ll))) end - """ hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) @@ -388,203 +389,192 @@ Further details on this hydrostatic reconstruction and its motivation can be fou A fast and stable well-balanced scheme with hydrostatic reconstruction for shallow water flows [DOI: 10.1137/S1064827503431090](https://doi.org/10.1137/S1064827503431090) """ -@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations::ShallowWaterEquations1D) - # Unpack left and right water heights and bottom topographies - h_ll, _, b_ll = u_ll - h_rr, _, b_rr = u_rr +@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, + equations::ShallowWaterEquations1D) + # Unpack left and right water heights and bottom topographies + h_ll, _, b_ll = u_ll + h_rr, _, b_rr = u_rr - # Get the velocities on either side - v1_ll = velocity(u_ll, equations) - v1_rr = velocity(u_rr, equations) + # Get the velocities on either side + v1_ll = velocity(u_ll, equations) + v1_rr = velocity(u_rr, equations) - # Compute the reconstructed water heights - h_ll_star = max(zero(h_ll) , h_ll + b_ll - max(b_ll, b_rr) ) - h_rr_star = max(zero(h_rr) , h_rr + b_rr - max(b_ll, b_rr) ) + # Compute the reconstructed water heights + h_ll_star = max(zero(h_ll), h_ll + b_ll - max(b_ll, b_rr)) + h_rr_star = max(zero(h_rr), h_rr + b_rr - max(b_ll, b_rr)) - # Create the conservative variables using the reconstruted water heights - u_ll_star = SVector( h_ll_star , h_ll_star * v1_ll , b_ll ) - u_rr_star = SVector( h_rr_star , h_rr_star * v1_rr , b_rr ) + # Create the conservative variables using the reconstruted water heights + u_ll_star = SVector(h_ll_star, h_ll_star * v1_ll, b_ll) + u_rr_star = SVector(h_rr_star, h_rr_star * v1_rr, b_rr) - return u_ll_star, u_rr_star + return u_ll_star, u_rr_star end - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - # Get the velocity quantities - v_ll = velocity(u_ll, equations) - v_rr = velocity(u_rr, equations) - - # Calculate the wave celerity on the left and right - h_ll = waterheight(u_ll, equations) - h_rr = waterheight(u_rr, equations) - c_ll = sqrt(equations.gravity * h_ll) - c_rr = sqrt(equations.gravity * h_rr) - - return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) -end +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations1D) + # Get the velocity quantities + v_ll = velocity(u_ll, equations) + v_rr = velocity(u_rr, equations) + # Calculate the wave celerity on the left and right + h_ll = waterheight(u_ll, equations) + h_rr = waterheight(u_rr, equations) + c_ll = sqrt(equations.gravity * h_ll) + c_rr = sqrt(equations.gravity * h_rr) + + return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) +end # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography -@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction, +@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, + orientation_or_normal_direction, equations::ShallowWaterEquations1D) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - diss = -0.5 * λ * (u_rr - u_ll) - return SVector(diss[1], diss[2], zero(eltype(u_ll))) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + diss = -0.5 * λ * (u_rr - u_ll) + return SVector(diss[1], diss[2], zero(eltype(u_ll))) end - # Specialized `FluxHLL` to avoid spurious dissipation in the bottom topography @inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction, equations::ShallowWaterEquations1D) - λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - - if λ_min >= 0 && λ_max >= 0 - return flux(u_ll, orientation_or_normal_direction, equations) - elseif λ_max <= 0 && λ_min <= 0 - return flux(u_rr, orientation_or_normal_direction, equations) - else - f_ll = flux(u_ll, orientation_or_normal_direction, equations) - f_rr = flux(u_rr, orientation_or_normal_direction, equations) - inv_λ_max_minus_λ_min = inv(λ_max - λ_min) - factor_ll = λ_max * inv_λ_max_minus_λ_min - factor_rr = λ_min * inv_λ_max_minus_λ_min - factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min - diss = u_rr - u_ll - return factor_ll * f_ll - factor_rr * f_rr + factor_diss * SVector(diss[1], diss[2], zero(eltype(u_ll))) - end + λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + + if λ_min >= 0 && λ_max >= 0 + return flux(u_ll, orientation_or_normal_direction, equations) + elseif λ_max <= 0 && λ_min <= 0 + return flux(u_rr, orientation_or_normal_direction, equations) + else + f_ll = flux(u_ll, orientation_or_normal_direction, equations) + f_rr = flux(u_rr, orientation_or_normal_direction, equations) + inv_λ_max_minus_λ_min = inv(λ_max - λ_min) + factor_ll = λ_max * inv_λ_max_minus_λ_min + factor_rr = λ_min * inv_λ_max_minus_λ_min + factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min + diss = u_rr - u_ll + return factor_ll * f_ll - factor_rr * f_rr + + factor_diss * SVector(diss[1], diss[2], zero(eltype(u_ll))) + end end - # Calculate minimum and maximum wave speeds for HLL-type fluxes @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D) - h_ll = waterheight(u_ll, equations) - v_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v_rr = velocity(u_rr, equations) + h_ll = waterheight(u_ll, equations) + v_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v_rr = velocity(u_rr, equations) - λ_min = v_ll - sqrt(equations.gravity * h_ll) - λ_max = v_rr + sqrt(equations.gravity * h_rr) + λ_min = v_ll - sqrt(equations.gravity * h_ll) + λ_max = v_rr + sqrt(equations.gravity * h_rr) - return λ_min, λ_max + return λ_min, λ_max end - @inline function max_abs_speeds(u, equations::ShallowWaterEquations1D) - h = waterheight(u, equations) - v = velocity(u, equations) + h = waterheight(u, equations) + v = velocity(u, equations) - c = equations.gravity * sqrt(h) - return (abs(v) + c,) + c = equations.gravity * sqrt(h) + return (abs(v) + c,) end - # Helper function to extract the velocity vector from the conservative variables @inline function velocity(u, equations::ShallowWaterEquations1D) - h, h_v, _ = u + h, h_v, _ = u - v = h_v / h + v = h_v / h - return v + return v end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::ShallowWaterEquations1D) - h, _, b = u + h, _, b = u - H = h + b - v = velocity(u, equations) - return SVector(H, v, b) + H = h + b + v = velocity(u, equations) + return SVector(H, v, b) end - # Convert conservative variables to entropy # Note, only the first two are the entropy variables, the third entry still # just carries the bottom topography values for convenience @inline function cons2entropy(u, equations::ShallowWaterEquations1D) - h, h_v, b = u + h, h_v, b = u - v = velocity(u, equations) + v = velocity(u, equations) - w1 = equations.gravity * (h + b) - 0.5 * v^2 - w2 = v + w1 = equations.gravity * (h + b) - 0.5 * v^2 + w2 = v - return SVector(w1, w2, b) + return SVector(w1, w2, b) end - # Convert entropy variables to conservative @inline function entropy2cons(w, equations::ShallowWaterEquations1D) - w1, w2, b = w + w1, w2, b = w - h = (w1 + 0.5 * w2^2) / equations.gravity - b - h_v = h * w2 - return SVector(h, h_v, b) + h = (w1 + 0.5 * w2^2) / equations.gravity - b + h_v = h * w2 + return SVector(h, h_v, b) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::ShallowWaterEquations1D) - H, v, b = prim + H, v, b = prim - h = H - b - h_v = h * v + h = H - b + h_v = h * v - return SVector(h, h_v, b) + return SVector(h, h_v, b) end - @inline function waterheight(u, equations::ShallowWaterEquations1D) - return u[1] + return u[1] end - @inline function pressure(u, equations::ShallowWaterEquations1D) - h = waterheight(u, equations) - p = 0.5 * equations.gravity * h^2 - return p + h = waterheight(u, equations) + p = 0.5 * equations.gravity * h^2 + return p end - @inline function waterheight_pressure(u, equations::ShallowWaterEquations1D) - return waterheight(u, equations) * pressure(u, equations) + return waterheight(u, equations) * pressure(u, equations) end - # Entropy function for the shallow water equations is the total energy -@inline entropy(cons, equations::ShallowWaterEquations1D) = energy_total(cons, equations) - +@inline function entropy(cons, equations::ShallowWaterEquations1D) + energy_total(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline function energy_total(cons, equations::ShallowWaterEquations1D) - h, h_v, b = cons + h, h_v, b = cons - e = (h_v^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b - return e + e = (h_v^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b + return e end - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::ShallowWaterEquations1D) - h, h_v, _ = u - return (h_v^2) / (2 * h) + h, h_v, _ = u + return (h_v^2) / (2 * h) end - # Calculate potential energy for a conservative state `cons` @inline function energy_internal(cons, equations::ShallowWaterEquations1D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - # Calculate the error for the "lake-at-rest" test case where H = h+b should # be a constant value over time @inline function lake_at_rest_error(u, equations::ShallowWaterEquations1D) - h, _, b = u - return abs(equations.H0 - (h + b)) + h, _, b = u + return abs(equations.H0 - (h + b)) end - end # @muladd diff --git a/src/equations/shallow_water_2d.jl b/src/equations/shallow_water_2d.jl index b07fbfc739e..f9ebbd597f9 100644 --- a/src/equations/shallow_water_2d.jl +++ b/src/equations/shallow_water_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent @doc raw""" ShallowWaterEquations2D(gravity, H0) @@ -47,27 +47,25 @@ References for the SWE are many but a good introduction is available in Chapter Finite Volume Methods for Hyperbolic Problems [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253) """ -struct ShallowWaterEquations2D{RealT<:Real} <: AbstractShallowWaterEquations{2, 4} - gravity::RealT # gravitational constant - H0::RealT # constant "lake-at-rest" total water height +struct ShallowWaterEquations2D{RealT <: Real} <: AbstractShallowWaterEquations{2, 4} + gravity::RealT # gravitational constant + H0::RealT # constant "lake-at-rest" total water height end # Allow for flexibility to set the gravitational constant within an elixir depending on the # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values. # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest" # well-balancedness test cases -function ShallowWaterEquations2D(; gravity_constant, H0=0.0) - ShallowWaterEquations2D(gravity_constant, H0) +function ShallowWaterEquations2D(; gravity_constant, H0 = 0.0) + ShallowWaterEquations2D(gravity_constant, H0) end - have_nonconservative_terms(::ShallowWaterEquations2D) = True() varnames(::typeof(cons2cons), ::ShallowWaterEquations2D) = ("h", "h_v1", "h_v2", "b") # Note, we use the total water height, H = h + b, as the first primitive variable for easier # visualization and setting initial conditions varnames(::typeof(cons2prim), ::ShallowWaterEquations2D) = ("H", "v1", "v2", "b") - # Set initial conditions at physical location `x` for time `t` """ initial_condition_convergence_test(x, t, equations::ShallowWaterEquations2D) @@ -77,18 +75,18 @@ A smooth initial condition used for convergence tests in combination with (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ function initial_condition_convergence_test(x, t, equations::ShallowWaterEquations2D) - # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2 - c = 7.0 - omega_x = 2.0 * pi * sqrt(2.0) - omega_t = 2.0 * pi + # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2 + c = 7.0 + omega_x = 2.0 * pi * sqrt(2.0) + omega_t = 2.0 * pi - x1, x2 = x + x1, x2 = x - H = c + cos(omega_x * x1) * sin(omega_x * x2) * cos(omega_t * t) - v1 = 0.5 - v2 = 1.5 - b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2) - return prim2cons(SVector(H, v1, v2, b), equations) + H = c + cos(omega_x * x1) * sin(omega_x * x2) * cos(omega_t * t) + v1 = 0.5 + v2 = 1.5 + b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2) + return prim2cons(SVector(H, v1, v2, b), equations) end """ @@ -102,42 +100,42 @@ This manufactured solution source term is specifically designed for the bottom t `b(x,y) = 2 + 0.5 * sin(sqrt(2)*pi*x) + 0.5 * sin(sqrt(2)*pi*y)` as defined in [`initial_condition_convergence_test`](@ref). """ -@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterEquations2D) - # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because - # this manufactured solution velocities are taken to be constants - c = 7.0 - omega_x = 2.0 * pi * sqrt(2.0) - omega_t = 2.0 * pi - omega_b = sqrt(2.0) * pi - v1 = 0.5 - v2 = 1.5 - - x1, x2 = x - - sinX, cosX = sincos(omega_x * x1) - sinY, cosY = sincos(omega_x * x2) - sinT, cosT = sincos(omega_t * t ) - - H = c + cosX * sinY * cosT - H_x = -omega_x * sinX * sinY * cosT - H_y = omega_x * cosX * cosY * cosT - # this time derivative for the water height exploits that the bottom topography is - # fixed in time such that H_t = (h+b)_t = h_t + 0 - H_t = -omega_t * cosX * sinY * sinT - - # bottom topography and its gradient - b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2) - tmp1 = 0.5 * omega_b - b_x = tmp1 * cos(omega_b * x1) - b_y = tmp1 * cos(omega_b * x2) - - du1 = H_t + v1 * (H_x - b_x) + v2 * (H_y - b_y) - du2 = v1 * du1 + equations.gravity * (H - b) * H_x - du3 = v2 * du1 + equations.gravity * (H - b) * H_y - return SVector(du1, du2, du3, 0.0) +@inline function source_terms_convergence_test(u, x, t, + equations::ShallowWaterEquations2D) + # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because + # this manufactured solution velocities are taken to be constants + c = 7.0 + omega_x = 2.0 * pi * sqrt(2.0) + omega_t = 2.0 * pi + omega_b = sqrt(2.0) * pi + v1 = 0.5 + v2 = 1.5 + + x1, x2 = x + + sinX, cosX = sincos(omega_x * x1) + sinY, cosY = sincos(omega_x * x2) + sinT, cosT = sincos(omega_t * t) + + H = c + cosX * sinY * cosT + H_x = -omega_x * sinX * sinY * cosT + H_y = omega_x * cosX * cosY * cosT + # this time derivative for the water height exploits that the bottom topography is + # fixed in time such that H_t = (h+b)_t = h_t + 0 + H_t = -omega_t * cosX * sinY * sinT + + # bottom topography and its gradient + b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2) + tmp1 = 0.5 * omega_b + b_x = tmp1 * cos(omega_b * x1) + b_y = tmp1 * cos(omega_b * x2) + + du1 = H_t + v1 * (H_x - b_x) + v2 * (H_y - b_y) + du2 = v1 * du1 + equations.gravity * (H - b) * H_x + du3 = v2 * du1 + equations.gravity * (H - b) * H_y + return SVector(du1, du2, du3, 0.0) end - """ initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations2D) @@ -145,24 +143,23 @@ A weak blast wave discontinuity useful for testing, e.g., total energy conservat Note for the shallow water equations to the total energy acts as a mathematical entropy function. """ function initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations2D) - # Set up polar coordinates - inicenter = SVector(0.7, 0.7) - x_norm = x[1] - inicenter[1] - y_norm = x[2] - inicenter[2] - r = sqrt(x_norm^2 + y_norm^2) - phi = atan(y_norm, x_norm) - sin_phi, cos_phi = sincos(phi) + # Set up polar coordinates + inicenter = SVector(0.7, 0.7) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + phi = atan(y_norm, x_norm) + sin_phi, cos_phi = sincos(phi) - # Calculate primitive variables - H = r > 0.5 ? 3.25 : 4.0 - v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi - v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi - b = 0.0 # by default assume there is no bottom topography + # Calculate primitive variables + H = r > 0.5 ? 3.25 : 4.0 + v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi + v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi + b = 0.0 # by default assume there is no bottom topography - return prim2cons(SVector(H, v1, v2, b), equations) + return prim2cons(SVector(H, v1, v2, b), equations) end - """ boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function, equations::ShallowWaterEquations2D) @@ -179,25 +176,24 @@ For details see Section 9.2.5 of the book: x, t, surface_flux_function, equations::ShallowWaterEquations2D) - # normalize the outward pointing direction - normal = normal_direction / norm(normal_direction) + # normalize the outward pointing direction + normal = normal_direction / norm(normal_direction) - # compute the normal velocity - u_normal = normal[1] * u_inner[2] + normal[2] * u_inner[3] + # compute the normal velocity + u_normal = normal[1] * u_inner[2] + normal[2] * u_inner[3] - # create the "external" boundary solution state - u_boundary = SVector(u_inner[1], - u_inner[2] - 2.0 * u_normal * normal[1], - u_inner[3] - 2.0 * u_normal * normal[2], - u_inner[4]) + # create the "external" boundary solution state + u_boundary = SVector(u_inner[1], + u_inner[2] - 2.0 * u_normal * normal[1], + u_inner[3] - 2.0 * u_normal * normal[2], + u_inner[4]) - # calculate the boundary flux - flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) + # calculate the boundary flux + flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) - return flux + return flux end - """ boundary_condition_slip_wall(u_inner, orientation, direction, x, t, surface_flux_function, equations::ShallowWaterEquations2D) @@ -208,56 +204,55 @@ Should be used together with [`TreeMesh`](@ref). direction, x, t, surface_flux_function, equations::ShallowWaterEquations2D) - ## get the appropriate normal vector from the orientation - if orientation == 1 - u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], u_inner[4]) - else # orientation == 2 - u_boundary = SVector(u_inner[1], u_inner[2], -u_inner[3], u_inner[4]) - end + ## get the appropriate normal vector from the orientation + if orientation == 1 + u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], u_inner[4]) + else # orientation == 2 + u_boundary = SVector(u_inner[1], u_inner[2], -u_inner[3], u_inner[4]) + end - # compute and return the flux using `boundary_condition_slip_wall` routine above - flux = surface_flux_function(u_inner, u_boundary, orientation, equations) + # compute and return the flux using `boundary_condition_slip_wall` routine above + flux = surface_flux_function(u_inner, u_boundary, orientation, equations) - return flux + return flux end # Calculate 1D flux for a single point # Note, the bottom topography has no flux @inline function flux(u, orientation::Integer, equations::ShallowWaterEquations2D) - h, h_v1, h_v2, _ = u - v1, v2 = velocity(u, equations) - - p = 0.5 * equations.gravity * h^2 - if orientation == 1 - f1 = h_v1 - f2 = h_v1 * v1 + p - f3 = h_v1 * v2 - else - f1 = h_v2 - f2 = h_v2 * v1 - f3 = h_v2 * v2 + p - end - return SVector(f1, f2, f3, zero(eltype(u))) + h, h_v1, h_v2, _ = u + v1, v2 = velocity(u, equations) + + p = 0.5 * equations.gravity * h^2 + if orientation == 1 + f1 = h_v1 + f2 = h_v1 * v1 + p + f3 = h_v1 * v2 + else + f1 = h_v2 + f2 = h_v2 * v1 + f3 = h_v2 * v2 + p + end + return SVector(f1, f2, f3, zero(eltype(u))) end - # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized and the bottom topography has no flux -@inline function flux(u, normal_direction::AbstractVector, equations::ShallowWaterEquations2D) - h = waterheight(u, equations) - v1, v2 = velocity(u, equations) +@inline function flux(u, normal_direction::AbstractVector, + equations::ShallowWaterEquations2D) + h = waterheight(u, equations) + v1, v2 = velocity(u, equations) - v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] - h_v_normal = h * v_normal - p = 0.5 * equations.gravity * h^2 + v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + h_v_normal = h * v_normal + p = 0.5 * equations.gravity * h^2 - f1 = h_v_normal - f2 = h_v_normal * v1 + p * normal_direction[1] - f3 = h_v_normal * v2 + p * normal_direction[2] - return SVector(f1, f2, f3, zero(eltype(u))) + f1 = h_v_normal + f2 = h_v_normal * v1 + p * normal_direction[1] + f3 = h_v_normal * v2 + p * normal_direction[2] + return SVector(f1, f2, f3, zero(eltype(u))) end - """ flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) @@ -282,36 +277,35 @@ Further details are available in the paper: """ @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Pull the necessary left and right state information - h_ll = waterheight(u_ll, equations) - b_rr = u_rr[4] + # Pull the necessary left and right state information + h_ll = waterheight(u_ll, equations) + b_rr = u_rr[4] - z = zero(eltype(u_ll)) - # Bottom gradient nonconservative term: (0, g h b_x, g h b_y, 0) - if orientation == 1 - f = SVector(z, equations.gravity * h_ll * b_rr, z, z) - else # orientation == 2 - f = SVector(z, z, equations.gravity * h_ll * b_rr, z) - end - return f + z = zero(eltype(u_ll)) + # Bottom gradient nonconservative term: (0, g h b_x, g h b_y, 0) + if orientation == 1 + f = SVector(z, equations.gravity * h_ll * b_rr, z, z) + else # orientation == 2 + f = SVector(z, z, equations.gravity * h_ll * b_rr, z) + end + return f end @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::ShallowWaterEquations2D) - # Pull the necessary left and right state information - h_ll = waterheight(u_ll, equations) - b_rr = u_rr[4] - # Note this routine only uses the `normal_direction_average` and the average of the - # bottom topography to get a quadratic split form DG gradient on curved elements - return SVector(zero(eltype(u_ll)), - normal_direction_average[1] * equations.gravity * h_ll * b_rr, - normal_direction_average[2] * equations.gravity * h_ll * b_rr, - zero(eltype(u_ll))) + # Pull the necessary left and right state information + h_ll = waterheight(u_ll, equations) + b_rr = u_rr[4] + # Note this routine only uses the `normal_direction_average` and the average of the + # bottom topography to get a quadratic split form DG gradient on curved elements + return SVector(zero(eltype(u_ll)), + normal_direction_average[1] * equations.gravity * h_ll * b_rr, + normal_direction_average[2] * equations.gravity * h_ll * b_rr, + zero(eltype(u_ll))) end - """ flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) @@ -346,61 +340,62 @@ and for curvilinear 2D case in the paper: """ @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Pull the necessary left and right state information - h_ll, _, _, b_ll = u_ll - h_rr, _, _, b_rr = u_rr - - h_average = 0.5 * (h_ll + h_rr) - b_jump = b_rr - b_ll - - # Includes two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid - # cross-averaging across a discontinuous bottom topography - # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry - z = zero(eltype(u_ll)) - if orientation == 1 - f = SVector(z, - equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump, - z, z) - else # orientation == 2 - f = SVector(z, z, - equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump, - z) - end - - return f + # Pull the necessary left and right state information + h_ll, _, _, b_ll = u_ll + h_rr, _, _, b_rr = u_rr + + h_average = 0.5 * (h_ll + h_rr) + b_jump = b_rr - b_ll + + # Includes two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid + # cross-averaging across a discontinuous bottom topography + # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry + z = zero(eltype(u_ll)) + if orientation == 1 + f = SVector(z, + equations.gravity * h_ll * b_ll + + equations.gravity * h_average * b_jump, + z, z) + else # orientation == 2 + f = SVector(z, z, + equations.gravity * h_ll * b_ll + + equations.gravity * h_average * b_jump, + z) + end + + return f end @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::ShallowWaterEquations2D) - # Pull the necessary left and right state information - h_ll, _, _, b_ll = u_ll - h_rr, _, _, b_rr = u_rr + # Pull the necessary left and right state information + h_ll, _, _, b_ll = u_ll + h_rr, _, _, b_rr = u_rr - # Comes in two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` - # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography + # Comes in two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` + # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography - f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll - f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll + f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll + f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll - # (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump` - # to handle discontinuous bathymetry - h_average = 0.5 * (h_ll + h_rr) - b_jump = b_rr - b_ll + # (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump` + # to handle discontinuous bathymetry + h_average = 0.5 * (h_ll + h_rr) + b_jump = b_rr - b_ll - f2 += normal_direction_ll[1] * equations.gravity * h_average * b_jump - f3 += normal_direction_ll[2] * equations.gravity * h_average * b_jump + f2 += normal_direction_ll[1] * equations.gravity * h_average * b_jump + f3 += normal_direction_ll[2] * equations.gravity * h_average * b_jump - # First and last equations do not have a nonconservative flux - f1 = f4 = zero(eltype(u_ll)) + # First and last equations do not have a nonconservative flux + f1 = f4 = zero(eltype(u_ll)) - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end - """ hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, orientation_or_normal_direction, equations::ShallowWaterEquations2D) @@ -415,27 +410,27 @@ Further details for the hydrostatic reconstruction and its motivation can be fou A fast and stable well-balanced scheme with hydrostatic reconstruction for shallow water flows [DOI: 10.1137/S1064827503431090](https://doi.org/10.1137/S1064827503431090) """ -@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations::ShallowWaterEquations2D) - # Unpack left and right water heights and bottom topographies - h_ll, _, _, b_ll = u_ll - h_rr, _, _, b_rr = u_rr +@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, + equations::ShallowWaterEquations2D) + # Unpack left and right water heights and bottom topographies + h_ll, _, _, b_ll = u_ll + h_rr, _, _, b_rr = u_rr - # Get the velocities on either side - v1_ll, v2_ll = velocity(u_ll, equations) - v1_rr, v2_rr = velocity(u_rr, equations) + # Get the velocities on either side + v1_ll, v2_ll = velocity(u_ll, equations) + v1_rr, v2_rr = velocity(u_rr, equations) - # Compute the reconstructed water heights - h_ll_star = max(zero(h_ll) , h_ll + b_ll - max(b_ll, b_rr) ) - h_rr_star = max(zero(h_rr) , h_rr + b_rr - max(b_ll, b_rr) ) + # Compute the reconstructed water heights + h_ll_star = max(zero(h_ll), h_ll + b_ll - max(b_ll, b_rr)) + h_rr_star = max(zero(h_rr), h_rr + b_rr - max(b_ll, b_rr)) - # Create the conservative variables using the reconstruted water heights - u_ll_star = SVector( h_ll_star , h_ll_star * v1_ll , h_ll_star * v2_ll , b_ll ) - u_rr_star = SVector( h_rr_star , h_rr_star * v1_rr , h_rr_star * v2_rr , b_rr ) + # Create the conservative variables using the reconstruted water heights + u_ll_star = SVector(h_ll_star, h_ll_star * v1_ll, h_ll_star * v2_ll, b_ll) + u_rr_star = SVector(h_rr_star, h_rr_star * v1_rr, h_rr_star * v2_rr, b_rr) - return u_ll_star, u_rr_star + return u_ll_star, u_rr_star end - """ flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) @@ -460,67 +455,67 @@ Further details for the hydrostatic reconstruction and its motivation can be fou """ @inline function flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Pull the water height and bottom topography on the left - h_ll, _, _, b_ll = u_ll - - # Create the hydrostatic reconstruction for the left solution state - u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) - - # Copy the reconstructed water height for easier to read code - h_ll_star = u_ll_star[1] - - z = zero(eltype(u_ll)) - # Includes two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid - # cross-averaging across a discontinuous bottom topography - # (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry - if orientation == 1 - f = SVector(z, - equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ), - z, z) - else # orientation == 2 - f = SVector(z, z, - equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ), - z) - end - - return f + # Pull the water height and bottom topography on the left + h_ll, _, _, b_ll = u_ll + + # Create the hydrostatic reconstruction for the left solution state + u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) + + # Copy the reconstructed water height for easier to read code + h_ll_star = u_ll_star[1] + + z = zero(eltype(u_ll)) + # Includes two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid + # cross-averaging across a discontinuous bottom topography + # (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry + if orientation == 1 + f = SVector(z, + equations.gravity * h_ll * b_ll + + equations.gravity * (h_ll^2 - h_ll_star^2), + z, z) + else # orientation == 2 + f = SVector(z, z, + equations.gravity * h_ll * b_ll + + equations.gravity * (h_ll^2 - h_ll_star^2), + z) + end + + return f end @inline function flux_nonconservative_audusse_etal(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::ShallowWaterEquations2D) - # Pull the water height and bottom topography on the left - h_ll, _, _, b_ll = u_ll + # Pull the water height and bottom topography on the left + h_ll, _, _, b_ll = u_ll - # Create the hydrostatic reconstruction for the left solution state - u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) + # Create the hydrostatic reconstruction for the left solution state + u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations) - # Copy the reconstructed water height for easier to read code - h_ll_star = u_ll_star[1] + # Copy the reconstructed water height for easier to read code + h_ll_star = u_ll_star[1] - # Comes in two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` - # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography + # Comes in two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` + # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography - f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll - f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll + f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll + f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll - # (ii) True surface part that uses `normal_direction_ll`, `h_ll` and `h_ll_star` - # to handle discontinuous bathymetry + # (ii) True surface part that uses `normal_direction_ll`, `h_ll` and `h_ll_star` + # to handle discontinuous bathymetry - f2 += normal_direction_ll[1] * equations.gravity * ( h_ll^2 - h_ll_star^2 ) - f3 += normal_direction_ll[2] * equations.gravity * ( h_ll^2 - h_ll_star^2 ) + f2 += normal_direction_ll[1] * equations.gravity * (h_ll^2 - h_ll_star^2) + f3 += normal_direction_ll[2] * equations.gravity * (h_ll^2 - h_ll_star^2) - # First and last equations do not have a nonconservative flux - f1 = f4 = zero(eltype(u_ll)) + # First and last equations do not have a nonconservative flux + f1 = f4 = zero(eltype(u_ll)) - return SVector(f1, f2, f3, f4) + return SVector(f1, f2, f3, f4) end - - """ flux_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction, equations::ShallowWaterEquations2D) @@ -534,59 +529,60 @@ Details are available in Eq. (4.1) in the paper: Well-balanced and energy stable schemes for the shallow water equations with discontinuous topography [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042) """ -@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Unpack left and right state - h_ll = waterheight(u_ll, equations) - v1_ll, v2_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - h_avg = 0.5 * (h_ll + h_rr ) - v1_avg = 0.5 * (v1_ll + v1_rr ) - v2_avg = 0.5 * (v2_ll + v2_rr ) - p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = h_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - else - f1 = h_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - end - - return SVector(f1, f2, f3, zero(eltype(u_ll))) -end - -@inline function flux_fjordholm_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D) - # Unpack left and right state - h_ll = waterheight(u_ll, equations) - v1_ll, v2_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - - # Average each factor of products in flux - h_avg = 0.5 * (h_ll + h_rr ) - v1_avg = 0.5 * (v1_ll + v1_rr ) - v2_avg = 0.5 * (v2_ll + v2_rr ) - h2_avg = 0.5 * (h_ll^2 + h_rr^2) - p_avg = 0.5 * equations.gravity * h2_avg - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - - # Calculate fluxes depending on normal_direction - f1 = h_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - - return SVector(f1, f2, f3, zero(eltype(u_ll))) -end +@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations2D) + # Unpack left and right state + h_ll = waterheight(u_ll, equations) + v1_ll, v2_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + h_avg = 0.5 * (h_ll + h_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = h_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + else + f1 = h_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + end + + return SVector(f1, f2, f3, zero(eltype(u_ll))) +end + +@inline function flux_fjordholm_etal(u_ll, u_rr, normal_direction::AbstractVector, + equations::ShallowWaterEquations2D) + # Unpack left and right state + h_ll = waterheight(u_ll, equations) + v1_ll, v2_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + # Average each factor of products in flux + h_avg = 0.5 * (h_ll + h_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + h2_avg = 0.5 * (h_ll^2 + h_rr^2) + p_avg = 0.5 * equations.gravity * h2_avg + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + + # Calculate fluxes depending on normal_direction + f1 = h_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + + return SVector(f1, f2, f3, zero(eltype(u_ll))) +end """ flux_wintermeyer_etal(u_ll, u_rr, orientation_or_normal_direction, @@ -602,282 +598,274 @@ Further details are available in Theorem 1 of the paper: shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036) """ -@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Unpack left and right state - h_ll, h_v1_ll, h_v2_ll, _ = u_ll - h_rr, h_v1_rr, h_v2_rr, _ = u_rr - - # Get the velocities on either side - v1_ll, v2_ll = velocity(u_ll, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * equations.gravity * h_ll * h_rr - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = 0.5 * (h_v1_ll + h_v1_rr) - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - else - f1 = 0.5 * (h_v2_ll + h_v2_rr) - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - end - - return SVector(f1, f2, f3, zero(eltype(u_ll))) +@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations2D) + # Unpack left and right state + h_ll, h_v1_ll, h_v2_ll, _ = u_ll + h_rr, h_v1_rr, h_v2_rr, _ = u_rr + + # Get the velocities on either side + v1_ll, v2_ll = velocity(u_ll, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * equations.gravity * h_ll * h_rr + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = 0.5 * (h_v1_ll + h_v1_rr) + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + else + f1 = 0.5 * (h_v2_ll + h_v2_rr) + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + end + + return SVector(f1, f2, f3, zero(eltype(u_ll))) +end + +@inline function flux_wintermeyer_etal(u_ll, u_rr, normal_direction::AbstractVector, + equations::ShallowWaterEquations2D) + # Unpack left and right state + h_ll, h_v1_ll, h_v2_ll, _ = u_ll + h_rr, h_v1_rr, h_v2_rr, _ = u_rr + + # Get the velocities on either side + v1_ll, v2_ll = velocity(u_ll, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + h_v1_avg = 0.5 * (h_v1_ll + h_v1_rr) + h_v2_avg = 0.5 * (h_v2_ll + h_v2_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * equations.gravity * h_ll * h_rr + + # Calculate fluxes depending on normal_direction + f1 = h_v1_avg * normal_direction[1] + h_v2_avg * normal_direction[2] + f2 = f1 * v1_avg + p_avg * normal_direction[1] + f3 = f1 * v2_avg + p_avg * normal_direction[2] + + return SVector(f1, f2, f3, zero(eltype(u_ll))) end -@inline function flux_wintermeyer_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D) - # Unpack left and right state - h_ll, h_v1_ll, h_v2_ll, _ = u_ll - h_rr, h_v1_rr, h_v2_rr, _ = u_rr - - # Get the velocities on either side - v1_ll, v2_ll = velocity(u_ll, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - h_v1_avg = 0.5 * (h_v1_ll + h_v1_rr ) - h_v2_avg = 0.5 * (h_v2_ll + h_v2_rr ) - v1_avg = 0.5 * (v1_ll + v1_rr ) - v2_avg = 0.5 * (v2_ll + v2_rr ) - p_avg = 0.5 * equations.gravity * h_ll * h_rr - - # Calculate fluxes depending on normal_direction - f1 = h_v1_avg * normal_direction[1] + h_v2_avg * normal_direction[2] - f2 = f1 * v1_avg + p_avg * normal_direction[1] - f3 = f1 * v2_avg + p_avg * normal_direction[2] - - return SVector(f1, f2, f3, zero(eltype(u_ll))) -end - - # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound -@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - # Get the velocity quantities in the appropriate direction - if orientation == 1 - v_ll, _ = velocity(u_ll, equations) - v_rr, _ = velocity(u_rr, equations) - else - _, v_ll = velocity(u_ll, equations) - _, v_rr = velocity(u_rr, equations) - end +@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, + equations::ShallowWaterEquations2D) + # Get the velocity quantities in the appropriate direction + if orientation == 1 + v_ll, _ = velocity(u_ll, equations) + v_rr, _ = velocity(u_rr, equations) + else + _, v_ll = velocity(u_ll, equations) + _, v_rr = velocity(u_rr, equations) + end - # Calculate the wave celerity on the left and right - h_ll = waterheight(u_ll, equations) - h_rr = waterheight(u_rr, equations) - c_ll = sqrt(equations.gravity * h_ll) - c_rr = sqrt(equations.gravity * h_rr) + # Calculate the wave celerity on the left and right + h_ll = waterheight(u_ll, equations) + h_rr = waterheight(u_rr, equations) + c_ll = sqrt(equations.gravity * h_ll) + c_rr = sqrt(equations.gravity * h_rr) - return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) + return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) end -@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D) - # Extract and compute the velocities in the normal direction - v1_ll, v2_ll = velocity(u_ll, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, + equations::ShallowWaterEquations2D) + # Extract and compute the velocities in the normal direction + v1_ll, v2_ll = velocity(u_ll, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - # Compute the wave celerity on the left and right - h_ll = waterheight(u_ll, equations) - h_rr = waterheight(u_rr, equations) - c_ll = sqrt(equations.gravity * h_ll) - c_rr = sqrt(equations.gravity * h_rr) + # Compute the wave celerity on the left and right + h_ll = waterheight(u_ll, equations) + h_rr = waterheight(u_rr, equations) + c_ll = sqrt(equations.gravity * h_ll) + c_rr = sqrt(equations.gravity * h_rr) - # The normal velocities are already scaled by the norm - return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) + # The normal velocities are already scaled by the norm + return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction) end - # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography -@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction, +@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, + orientation_or_normal_direction, equations::ShallowWaterEquations2D) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - diss = -0.5 * λ * (u_rr - u_ll) - return SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll))) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + diss = -0.5 * λ * (u_rr - u_ll) + return SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll))) end - # Specialized `FluxHLL` to avoid spurious dissipation in the bottom topography @inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction, - equations::ShallowWaterEquations2D) - λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - - if λ_min >= 0 && λ_max >= 0 - return flux(u_ll, orientation_or_normal_direction, equations) - elseif λ_max <= 0 && λ_min <= 0 - return flux(u_rr, orientation_or_normal_direction, equations) - else - f_ll = flux(u_ll, orientation_or_normal_direction, equations) - f_rr = flux(u_rr, orientation_or_normal_direction, equations) - inv_λ_max_minus_λ_min = inv(λ_max - λ_min) - factor_ll = λ_max * inv_λ_max_minus_λ_min - factor_rr = λ_min * inv_λ_max_minus_λ_min - factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min - diss = u_rr - u_ll - return factor_ll * f_ll - factor_rr * f_rr + factor_diss * SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll))) - end + equations::ShallowWaterEquations2D) + λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + + if λ_min >= 0 && λ_max >= 0 + return flux(u_ll, orientation_or_normal_direction, equations) + elseif λ_max <= 0 && λ_min <= 0 + return flux(u_rr, orientation_or_normal_direction, equations) + else + f_ll = flux(u_ll, orientation_or_normal_direction, equations) + f_rr = flux(u_rr, orientation_or_normal_direction, equations) + inv_λ_max_minus_λ_min = inv(λ_max - λ_min) + factor_ll = λ_max * inv_λ_max_minus_λ_min + factor_rr = λ_min * inv_λ_max_minus_λ_min + factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min + diss = u_rr - u_ll + return factor_ll * f_ll - factor_rr * f_rr + + factor_diss * SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll))) + end end - # Calculate minimum and maximum wave speeds for HLL-type fluxes @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D) - h_ll = waterheight(u_ll, equations) - v1_ll, v2_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v1_rr, v2_rr = velocity(u_rr, equations) + h_ll = waterheight(u_ll, equations) + v1_ll, v2_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v1_rr, v2_rr = velocity(u_rr, equations) - if orientation == 1 # x-direction - λ_min = v1_ll - sqrt(equations.gravity * h_ll) - λ_max = v1_rr + sqrt(equations.gravity * h_rr) - else # y-direction - λ_min = v2_ll - sqrt(equations.gravity * h_ll) - λ_max = v2_rr + sqrt(equations.gravity * h_rr) - end + if orientation == 1 # x-direction + λ_min = v1_ll - sqrt(equations.gravity * h_ll) + λ_max = v1_rr + sqrt(equations.gravity * h_rr) + else # y-direction + λ_min = v2_ll - sqrt(equations.gravity * h_ll) + λ_max = v2_rr + sqrt(equations.gravity * h_rr) + end - return λ_min, λ_max + return λ_min, λ_max end @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D) - h_ll = waterheight(u_ll, equations) - v1_ll, v2_ll = velocity(u_ll, equations) - h_rr = waterheight(u_rr, equations) - v1_rr, v2_rr = velocity(u_rr, equations) + h_ll = waterheight(u_ll, equations) + v1_ll, v2_ll = velocity(u_ll, equations) + h_rr = waterheight(u_rr, equations) + v1_rr, v2_rr = velocity(u_rr, equations) - v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] - v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] - norm_ = norm(normal_direction) - # The v_normals are already scaled by the norm - λ_min = v_normal_ll - sqrt(equations.gravity * h_ll) * norm_ - λ_max = v_normal_rr + sqrt(equations.gravity * h_rr) * norm_ + norm_ = norm(normal_direction) + # The v_normals are already scaled by the norm + λ_min = v_normal_ll - sqrt(equations.gravity * h_ll) * norm_ + λ_max = v_normal_rr + sqrt(equations.gravity * h_rr) * norm_ - return λ_min, λ_max + return λ_min, λ_max end - @inline function max_abs_speeds(u, equations::ShallowWaterEquations2D) - h = waterheight(u, equations) - v1, v2 = velocity(u, equations) + h = waterheight(u, equations) + v1, v2 = velocity(u, equations) - c = equations.gravity * sqrt(h) - return abs(v1) + c, abs(v2) + c + c = equations.gravity * sqrt(h) + return abs(v1) + c, abs(v2) + c end - # Helper function to extract the velocity vector from the conservative variables @inline function velocity(u, equations::ShallowWaterEquations2D) - h, h_v1, h_v2, _ = u + h, h_v1, h_v2, _ = u - v1 = h_v1 / h - v2 = h_v2 / h - return SVector(v1, v2) + v1 = h_v1 / h + v2 = h_v2 / h + return SVector(v1, v2) end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::ShallowWaterEquations2D) - h, _, _, b = u + h, _, _, b = u - H = h + b - v1, v2 = velocity(u, equations) - return SVector(H, v1, v2, b) + H = h + b + v1, v2 = velocity(u, equations) + return SVector(H, v1, v2, b) end - # Convert conservative variables to entropy # Note, only the first three are the entropy variables, the fourth entry still # just carries the bottom topography values for convenience @inline function cons2entropy(u, equations::ShallowWaterEquations2D) - h, h_v1, h_v2, b = u + h, h_v1, h_v2, b = u - v1, v2 = velocity(u, equations) - v_square = v1^2 + v2^2 + v1, v2 = velocity(u, equations) + v_square = v1^2 + v2^2 - w1 = equations.gravity * (h + b) - 0.5 * v_square - w2 = v1 - w3 = v2 - return SVector(w1, w2, w3, b) + w1 = equations.gravity * (h + b) - 0.5 * v_square + w2 = v1 + w3 = v2 + return SVector(w1, w2, w3, b) end - # Convert entropy variables to conservative @inline function entropy2cons(w, equations::ShallowWaterEquations2D) - w1, w2, w3, b = w + w1, w2, w3, b = w - h = (w1 + 0.5 * (w2^2 + w3^2)) / equations.gravity - b - h_v1 = h * w2 - h_v2 = h * w3 - return SVector(h, h_v1, h_v2, b) + h = (w1 + 0.5 * (w2^2 + w3^2)) / equations.gravity - b + h_v1 = h * w2 + h_v2 = h * w3 + return SVector(h, h_v1, h_v2, b) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::ShallowWaterEquations2D) - H, v1, v2, b = prim + H, v1, v2, b = prim - h = H - b - h_v1 = h * v1 - h_v2 = h * v2 - return SVector(h, h_v1, h_v2, b) + h = H - b + h_v1 = h * v1 + h_v2 = h * v2 + return SVector(h, h_v1, h_v2, b) end - @inline function waterheight(u, equations::ShallowWaterEquations2D) - return u[1] + return u[1] end - @inline function pressure(u, equations::ShallowWaterEquations2D) - h = waterheight(u, equations) - p = 0.5 * equations.gravity * h^2 - return p + h = waterheight(u, equations) + p = 0.5 * equations.gravity * h^2 + return p end - @inline function waterheight_pressure(u, equations::ShallowWaterEquations2D) - return waterheight(u, equations) * pressure(u, equations) + return waterheight(u, equations) * pressure(u, equations) end - # Entropy function for the shallow water equations is the total energy -@inline entropy(cons, equations::ShallowWaterEquations2D) = energy_total(cons, equations) - +@inline function entropy(cons, equations::ShallowWaterEquations2D) + energy_total(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline function energy_total(cons, equations::ShallowWaterEquations2D) - h, h_v1, h_v2, b = cons + h, h_v1, h_v2, b = cons - e = (h_v1^2 + h_v2^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b - return e + e = (h_v1^2 + h_v2^2) / (2 * h) + 0.5 * equations.gravity * h^2 + + equations.gravity * h * b + return e end - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::ShallowWaterEquations2D) - h, h_v1, h_v2, _ = u - return (h_v1^2 + h_v2^2) / (2 * h) + h, h_v1, h_v2, _ = u + return (h_v1^2 + h_v2^2) / (2 * h) end - # Calculate potential energy for a conservative state `cons` @inline function energy_internal(cons, equations::ShallowWaterEquations2D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - # Calculate the error for the "lake-at-rest" test case where H = h+b should # be a constant value over time @inline function lake_at_rest_error(u, equations::ShallowWaterEquations2D) - h, _, _, b = u - return abs(equations.H0 - (h + b)) + h, _, _, b = u + return abs(equations.H0 - (h + b)) end - end # @muladd diff --git a/src/equations/shallow_water_two_layer_1d.jl b/src/equations/shallow_water_two_layer_1d.jl index fd4fbc017ec..edf7d5e32ff 100644 --- a/src/equations/shallow_water_two_layer_1d.jl +++ b/src/equations/shallow_water_two_layer_1d.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent @doc raw""" ShallowWaterTwoLayerEquations1D(gravity, H0, rho_upper, rho_lower) @@ -57,37 +58,43 @@ A good introduction for the 2LSWE is available in Chapter 12 of the book: \ ISBN: 978-0-12-088759-0 """ -struct ShallowWaterTwoLayerEquations1D{RealT<:Real} <: AbstractShallowWaterEquations{1,5} - gravity::RealT # gravitational constant - H0::RealT # constant "lake-at-rest" total water height - rho_upper::RealT # lower layer density - rho_lower::RealT # upper layer density - r::RealT # ratio of rho_upper / rho_lower +struct ShallowWaterTwoLayerEquations1D{RealT <: Real} <: + AbstractShallowWaterEquations{1, 5} + gravity::RealT # gravitational constant + H0::RealT # constant "lake-at-rest" total water height + rho_upper::RealT # lower layer density + rho_lower::RealT # upper layer density + r::RealT # ratio of rho_upper / rho_lower end # Allow for flexibility to set the gravitational constant within an elixir depending on the # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values. # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest" # well-balancedness test cases. Densities must be specified such that rho_upper <= rho_lower. -function ShallowWaterTwoLayerEquations1D(; gravity_constant, H0=zero(gravity_constant), rho_upper, rho_lower) - # Assign density ratio if rho_upper <= rho_lower - if rho_upper > rho_lower - error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower") - else - r = rho_upper / rho_lower - end - ShallowWaterTwoLayerEquations1D(gravity_constant, H0, rho_upper, rho_lower, r) +function ShallowWaterTwoLayerEquations1D(; gravity_constant, + H0 = zero(gravity_constant), rho_upper, + rho_lower) + # Assign density ratio if rho_upper <= rho_lower + if rho_upper > rho_lower + error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower") + else + r = rho_upper / rho_lower + end + ShallowWaterTwoLayerEquations1D(gravity_constant, H0, rho_upper, rho_lower, r) end have_nonconservative_terms(::ShallowWaterTwoLayerEquations1D) = True() -varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations1D) = ("h_upper", "h_v1_upper", - "h_lower", "h_v1_lower", "b") +function varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations1D) + ("h_upper", "h_v1_upper", + "h_lower", "h_v1_lower", "b") +end # Note, we use the total water height, H_lower = h_upper + h_lower + b, and first layer total height # H_upper = h_upper + b as the first primitive variable for easier visualization and setting initial # conditions -varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations1D) = ("H_upper", "v1_upper", - "H_lower", "v1_lower", "b") - +function varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations1D) + ("H_upper", "v1_upper", + "H_lower", "v1_lower", "b") +end # Set initial conditions at physical location `x` for time `t` """ @@ -97,20 +104,20 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref) (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains). """ -function initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations1D) - # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)] - ω = 2.0 * pi * sqrt(2.0) - - H_lower = 2.0 + 0.1 * sin(ω * x[1] + t) - H_upper = 4.0 + 0.1 * cos(ω * x[1] + t) - v1_lower = 1.0 - v1_upper = 0.9 - b = 1.0 + 0.1 * cos(2.0 * ω * x[1]) - - return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations) +function initial_condition_convergence_test(x, t, + equations::ShallowWaterTwoLayerEquations1D) + # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)] + ω = 2.0 * pi * sqrt(2.0) + + H_lower = 2.0 + 0.1 * sin(ω * x[1] + t) + H_upper = 4.0 + 0.1 * cos(ω * x[1] + t) + v1_lower = 1.0 + v1_upper = 0.9 + b = 1.0 + 0.1 * cos(2.0 * ω * x[1]) + + return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations) end - """ source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations1D) @@ -121,26 +128,35 @@ in non-periodic domains). """ @inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations1D) - # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because - # this manufactured solution velocity is taken to be constant - ω = 2 * pi * sqrt(2.0) - - du1 = (-0.1*cos(t + ω*x[1]) - 0.1*sin(t + ω*x[1]) - 0.09*ω*cos(t + ω*x[1]) + - - 0.09*ω*sin(t + ω*x[1])) - du2 = (5.0 * (-0.1*ω*cos(t + ω*x[1]) - 0.1*ω*sin(t + ω*x[1])) * (4.0 + 0.2*cos(t + ω*x[1]) + - -0.2*sin(t + ω*x[1])) + 0.1*ω*(20.0 + cos(t + ω*x[1]) - sin(t + ω*x[1])) * cos(t + - ω*x[1]) - 0.09*cos(t + ω*x[1]) - 0.09*sin(t + ω*x[1]) - 0.081*ω*cos(t + ω*x[1]) + - -0.081*ω*sin(t + ω*x[1])) - du3 = 0.1*cos(t + ω*x[1]) + 0.1*ω*cos(t + ω*x[1]) + 0.2*ω*sin(2.0*ω*x[1]) - du4 = ((10.0 + sin(t + ω*x[1]) - cos(2ω*x[1]))*(-0.09*ω*cos(t + ω*x[1]) - 0.09*ω*sin(t + - ω*x[1]) - 0.2*ω*sin(2*ω*x[1])) + 0.1*cos(t + ω*x[1]) + 0.1*ω*cos(t + ω*x[1]) + - 5.0 * (0.1*ω*cos(t + ω*x[1]) + 0.2*ω*sin(2.0*ω*x[1])) * (2.0 + 0.2*sin(t + ω*x[1]) + - -0.2*cos(2.0*ω*x[1])) + 0.2*ω*sin(2.0*ω*x[1])) - - return SVector(du1, du2, du3, du4, zero(eltype(u))) + # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because + # this manufactured solution velocity is taken to be constant + ω = 2 * pi * sqrt(2.0) + + du1 = (-0.1 * cos(t + ω * x[1]) - 0.1 * sin(t + ω * x[1]) - + 0.09 * ω * cos(t + ω * x[1]) + + -0.09 * ω * sin(t + ω * x[1])) + du2 = (5.0 * (-0.1 * ω * cos(t + ω * x[1]) - 0.1 * ω * sin(t + ω * x[1])) * + (4.0 + 0.2 * cos(t + ω * x[1]) + + -0.2 * sin(t + ω * x[1])) + + 0.1 * ω * (20.0 + cos(t + ω * x[1]) - sin(t + ω * x[1])) * + cos(t + + ω * x[1]) - 0.09 * cos(t + ω * x[1]) - 0.09 * sin(t + ω * x[1]) - + 0.081 * ω * cos(t + ω * x[1]) + + -0.081 * ω * sin(t + ω * x[1])) + du3 = 0.1 * cos(t + ω * x[1]) + 0.1 * ω * cos(t + ω * x[1]) + + 0.2 * ω * sin(2.0 * ω * x[1]) + du4 = ((10.0 + sin(t + ω * x[1]) - cos(2ω * x[1])) * + (-0.09 * ω * cos(t + ω * x[1]) - 0.09 * ω * sin(t + + ω * x[1]) - + 0.2 * ω * sin(2 * ω * x[1])) + 0.1 * cos(t + ω * x[1]) + + 0.1 * ω * cos(t + ω * x[1]) + + 5.0 * (0.1 * ω * cos(t + ω * x[1]) + 0.2 * ω * sin(2.0 * ω * x[1])) * + (2.0 + 0.2 * sin(t + ω * x[1]) + + -0.2 * cos(2.0 * ω * x[1])) + 0.2 * ω * sin(2.0 * ω * x[1])) + + return SVector(du1, du2, du3, du4, zero(eltype(u))) end - """ boundary_condition_slip_wall(u_inner, orientation_or_normal, x, t, surface_flux_function, equations::ShallowWaterTwoLayerEquations1D) @@ -158,43 +174,42 @@ For details see Section 9.2.5 of the book: @inline function boundary_condition_slip_wall(u_inner, orientation_or_normal, direction, x, t, surface_flux_function, equations::ShallowWaterTwoLayerEquations1D) - # create the "external" boundary solution state - u_boundary = SVector(u_inner[1], - -u_inner[2], - u_inner[3], - -u_inner[4], - u_inner[5]) - - # calculate the boundary flux - if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary - f = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations) - else # u_boundary is "left" of boundary, u_inner is "right" of boundary - f = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations) - end - return f + # create the "external" boundary solution state + u_boundary = SVector(u_inner[1], + -u_inner[2], + u_inner[3], + -u_inner[4], + u_inner[5]) + + # calculate the boundary flux + if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary + f = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations) + else # u_boundary is "left" of boundary, u_inner is "right" of boundary + f = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations) + end + return f end - # Calculate 1D flux for a single point # Note, the bottom topography has no flux -@inline function flux(u, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u - - # Calculate velocities - v1_upper, v1_lower = velocity(u, equations) - # Calculate pressure - p1 = 0.5 * equations.gravity * h_upper^2 - p2 = 0.5 * equations.gravity * h_lower^2 - - f1 = h_v1_upper - f2 = h_v1_upper * v1_upper + p1 - f3 = h_v2_lower - f4 = h_v2_lower * v1_lower + p2 - - return SVector(f1, f2, f3, f4, zero(eltype(u))) +@inline function flux(u, orientation::Integer, + equations::ShallowWaterTwoLayerEquations1D) + h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u + + # Calculate velocities + v1_upper, v1_lower = velocity(u, equations) + # Calculate pressure + p1 = 0.5 * equations.gravity * h_upper^2 + p2 = 0.5 * equations.gravity * h_lower^2 + + f1 = h_v1_upper + f2 = h_v1_upper * v1_upper + p1 + f3 = h_v2_lower + f4 = h_v2_lower * v1_lower + p2 + + return SVector(f1, f2, f3, f4, zero(eltype(u))) end - """ flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) @@ -216,24 +231,23 @@ Further details are available in the paper: @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Pull the necessary left and right state information - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - b_rr = u_rr[5] - - z = zero(eltype(u_ll)) - - # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, - # 0, g*h_lower*(b+r*h_upper)_x, 0) - f = SVector(z, - equations.gravity * h_upper_ll * (b_rr + h_lower_rr), - z, - equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), - z) - return f + # Pull the necessary left and right state information + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + b_rr = u_rr[5] + + z = zero(eltype(u_ll)) + + # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, + # 0, g*h_lower*(b+r*h_upper)_x, 0) + f = SVector(z, + equations.gravity * h_upper_ll * (b_rr + h_lower_rr), + z, + equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), + z) + return f end - """ flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) @@ -256,35 +270,35 @@ formulation. @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Pull the necessary left and right state information - h_upper_ll, _, h_lower_ll, _, b_ll = u_ll - h_upper_rr, _, h_lower_rr, _, b_rr = u_rr - - # Create average and jump values - h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) - h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) - h_upper_jump = h_upper_rr - h_upper_ll - h_lower_jump = h_lower_rr - h_lower_ll - b_jump = b_rr - b_ll - - # Assign variables for constants for better readability - g = equations.gravity - - z = zero(eltype(u_ll)) - - # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, - # 0, g*h_lower*(b+r*h_upper)_x, 0) - f = SVector( - z, - g * h_upper_ll * (b_ll + h_lower_ll) + g * h_upper_average * (b_jump + h_lower_jump), - z, - g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump + - equations.r * h_upper_jump), - z) - return f + # Pull the necessary left and right state information + h_upper_ll, _, h_lower_ll, _, b_ll = u_ll + h_upper_rr, _, h_lower_rr, _, b_rr = u_rr + + # Create average and jump values + h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) + h_upper_jump = h_upper_rr - h_upper_ll + h_lower_jump = h_lower_rr - h_lower_ll + b_jump = b_rr - b_ll + + # Assign variables for constants for better readability + g = equations.gravity + + z = zero(eltype(u_ll)) + + # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, + # 0, g*h_lower*(b+r*h_upper)_x, 0) + f = SVector(z, + g * h_upper_ll * (b_ll + h_lower_ll) + + g * h_upper_average * (b_jump + h_lower_jump), + z, + g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + + g * h_lower_average * (b_jump + + equations.r * h_upper_jump), + z) + return f end - """ flux_fjordholm_etal(u_ll, u_rr, orientation, equations::ShallowWaterTwoLayerEquations1D) @@ -308,30 +322,29 @@ formulation. @inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Unpack left and right state - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - v1_ll, v2_ll = velocity(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr) - h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr) - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2) - p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2) - - # Calculate fluxes - f1 = h_upper_avg * v1_avg - f2 = f1 * v1_avg + p1_avg - f3 = h_lower_avg * v2_avg - f4 = f3 * v2_avg + p2_avg - - return SVector(f1, f2, f3, f4, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + v1_ll, v2_ll = velocity(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2) + p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2) + + # Calculate fluxes + f1 = h_upper_avg * v1_avg + f2 = f1 * v1_avg + p1_avg + f3 = h_lower_avg * v2_avg + f4 = f3 * v2_avg + p2_avg + + return SVector(f1, f2, f3, f4, zero(eltype(u_ll))) end - """ flux_wintermeyer_etal(u_ll, u_rr, orientation, equations::ShallowWaterTwoLayerEquations1D) @@ -351,30 +364,29 @@ Further details are available in Theorem 1 of the paper: @inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Unpack left and right state - h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll - h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr - - # Get the velocities on either side - v1_ll, v2_ll = velocity(u_ll, equations) - v1_rr, v2_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr - p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr - - # Calculate fluxes - f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr) - f2 = f1 * v1_avg + p1_avg - f3 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr) - f4 = f3 * v2_avg + p2_avg - - return SVector(f1, f2, f3, f4, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll + h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr + + # Get the velocities on either side + v1_ll, v2_ll = velocity(u_ll, equations) + v1_rr, v2_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr + p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr + + # Calculate fluxes + f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr) + f2 = f1 * v1_avg + p1_avg + f3 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr) + f4 = f3 * v2_avg + p2_avg + + return SVector(f1, f2, f3, f4, zero(eltype(u_ll))) end - """ flux_es_fjordholm_etal(u_ll, u_rr, orientation, equations::ShallowWaterTwoLayerEquations1D) @@ -394,59 +406,59 @@ formulation. @inline function flux_es_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Compute entropy conservative flux but without the bottom topography - f_ec = flux_fjordholm_etal(u_ll, u_rr, - orientation, - equations) - - # Get maximum signal velocity - λ = max_abs_speed_naive(u_ll, u_rr, orientation, equations) - # Get entropy variables but without the bottom topography - q_rr = cons2entropy(u_rr,equations) - q_ll = cons2entropy(u_ll,equations) - - # Average values from left and right - u_avg = (u_ll + u_rr) / 2 - - # Introduce variables for better readability - rho_upper = equations.rho_upper - rho_lower = equations.rho_lower - g = equations.gravity - drho = rho_upper - rho_lower - - # Entropy Jacobian matrix - H = @SMatrix [ - [-rho_lower/(g*rho_upper*drho);; - -rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);; - 1.0/(g*drho);; - u_avg[4]/(g*u_avg[3]*drho);; - 0]; - [-rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);; - (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 + - -rho_lower*u_avg[2]^2)/(g*rho_upper*u_avg[1]^2*drho);; - u_avg[2]/(g*u_avg[1]*drho);; - u_avg[2]*u_avg[4]/(g*u_avg[1]*u_avg[3]*drho);; - 0]; - [1.0/(g*drho);; - u_avg[2]/(g*u_avg[1]*drho);; - -1.0/(g*drho);; - -u_avg[4]/(g*u_avg[3]*drho);; - 0]; - [u_avg[4]/(g*u_avg[3]*drho);; - u_avg[2]*u_avg[4]/(g*u_avg[1]*u_avg[3]*drho);; - -u_avg[4]/(g*u_avg[3]*drho);; - (g*rho_upper*u_avg[3]^3 - g*rho_lower*u_avg[3]^3 + - -rho_lower*u_avg[4]^2)/(g*rho_lower*u_avg[3]^2*drho);; - 0]; - [0;;0;;0;;0;;0]] - - # Add dissipation to entropy conservative flux to obtain entropy stable flux - f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll) - - return SVector(f_es[1], f_es[2], f_es[3], f_es[4], zero(eltype(u_ll))) + # Compute entropy conservative flux but without the bottom topography + f_ec = flux_fjordholm_etal(u_ll, u_rr, + orientation, + equations) + + # Get maximum signal velocity + λ = max_abs_speed_naive(u_ll, u_rr, orientation, equations) + # Get entropy variables but without the bottom topography + q_rr = cons2entropy(u_rr, equations) + q_ll = cons2entropy(u_ll, equations) + + # Average values from left and right + u_avg = (u_ll + u_rr) / 2 + + # Introduce variables for better readability + rho_upper = equations.rho_upper + rho_lower = equations.rho_lower + g = equations.gravity + drho = rho_upper - rho_lower + + # Compute entropy Jacobian coefficients + h11 = -rho_lower / (g * rho_upper * drho) + h12 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho) + h13 = 1.0 / (g * drho) + h14 = u_avg[4] / (g * u_avg[3] * drho) + h21 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho) + h22 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 + + -rho_lower * u_avg[2]^2) / (g * rho_upper * u_avg[1]^2 * drho)) + h23 = u_avg[2] / (g * u_avg[1] * drho) + h24 = u_avg[2] * u_avg[4] / (g * u_avg[1] * u_avg[3] * drho) + h31 = 1.0 / (g * drho) + h32 = u_avg[2] / (g * u_avg[1] * drho) + h33 = -1.0 / (g * drho) + h34 = -u_avg[4] / (g * u_avg[3] * drho) + h41 = u_avg[4] / (g * u_avg[3] * drho) + h42 = u_avg[2] * u_avg[4] / (g * u_avg[1] * u_avg[3] * drho) + h43 = -u_avg[4] / (g * u_avg[3] * drho) + h44 = ((g * rho_upper * u_avg[3]^3 - g * rho_lower * u_avg[3]^3 + + -rho_lower * u_avg[4]^2) / (g * rho_lower * u_avg[3]^2 * drho)) + + # Entropy Jacobian matrix + H = @SMatrix [[h11;; h12;; h13;; h14;; 0]; + [h21;; h22;; h23;; h24;; 0]; + [h31;; h32;; h33;; h34;; 0]; + [h41;; h42;; h43;; h44;; 0]; + [0;; 0;; 0;; 0;; 0]] + + # Add dissipation to entropy conservative flux to obtain entropy stable flux + f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll) + + return SVector(f_es[1], f_es[2], f_es[3], f_es[4], zero(eltype(u_ll))) end - # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound. This function uses approximate # eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them @@ -459,135 +471,130 @@ end @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D) - # Unpack left and right state - h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll - h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr + # Unpack left and right state + h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll + h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr - # Get the averaged velocity - v_m_ll = (h_v1_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll) - v_m_rr = (h_v1_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr) + # Get the averaged velocity + v_m_ll = (h_v1_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll) + v_m_rr = (h_v1_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr) - # Calculate the wave celerity on the left and right - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll)) - c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) + # Calculate the wave celerity on the left and right + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll)) + c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) - return (max(abs(v_m_ll) + c_ll, abs(v_m_rr) + c_rr)) + return (max(abs(v_m_ll) + c_ll, abs(v_m_rr) + c_rr)) end - # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom # topography @inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, - orientation_or_normal_direction, equations::ShallowWaterTwoLayerEquations1D) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - diss = -0.5 * λ * (u_rr - u_ll) - return SVector(diss[1], diss[2], diss[3], diss[4], zero(eltype(u_ll))) + orientation_or_normal_direction, + equations::ShallowWaterTwoLayerEquations1D) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + diss = -0.5 * λ * (u_rr - u_ll) + return SVector(diss[1], diss[2], diss[3], diss[4], zero(eltype(u_ll))) end - # Absolute speed of the barotropic mode @inline function max_abs_speeds(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u - - # Calculate averaged velocity of both layers - v_m = (h_v1_upper + h_v2_lower) / (h_upper + h_lower) - c = sqrt(equations.gravity * (h_upper + h_lower)) + h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u - return (abs(v_m) + c) -end + # Calculate averaged velocity of both layers + v_m = (h_v1_upper + h_v2_lower) / (h_upper + h_lower) + c = sqrt(equations.gravity * (h_upper + h_lower)) + return (abs(v_m) + c) +end # Helper function to extract the velocity vector from the conservative variables @inline function velocity(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u + h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u - v1_upper = h_v1_upper / h_upper - v1_lower = h_v2_lower / h_lower - return SVector(v1_upper, v1_lower) + v1_upper = h_v1_upper / h_upper + v1_lower = h_v2_lower / h_lower + return SVector(v1_upper, v1_lower) end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, _, h_lower, _, b = u + h_upper, _, h_lower, _, b = u - H_lower = h_lower + b - H_upper = h_lower + h_upper + b - v1_upper, v1_lower = velocity(u, equations) - return SVector(H_upper, v1_upper, H_lower, v1_lower, b) + H_lower = h_lower + b + H_upper = h_lower + h_upper + b + v1_upper, v1_lower = velocity(u, equations) + return SVector(H_upper, v1_upper, H_lower, v1_lower, b) end - # Convert conservative variables to entropy variables # Note, only the first four are the entropy variables, the fifth entry still just carries the # bottom topography values for convenience @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, _, h_lower, _, b = u - v1_upper, v1_lower = velocity(u, equations) - - w1 = equations.rho_upper * (equations.gravity * (h_upper + h_lower + b) - 0.5 * v1_upper^2) - w2 = equations.rho_upper * v1_upper - w3 = equations.rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) - 0.5 * v1_lower^2) - w4 = equations.rho_lower * v1_lower - return SVector(w1, w2, w3, w4, b) + h_upper, _, h_lower, _, b = u + v1_upper, v1_lower = velocity(u, equations) + + w1 = equations.rho_upper * + (equations.gravity * (h_upper + h_lower + b) - 0.5 * v1_upper^2) + w2 = equations.rho_upper * v1_upper + w3 = equations.rho_lower * + (equations.gravity * (equations.r * h_upper + h_lower + b) - 0.5 * v1_lower^2) + w4 = equations.rho_lower * v1_lower + return SVector(w1, w2, w3, w4, b) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::ShallowWaterTwoLayerEquations1D) - H_upper, v1_upper, H_lower, v1_lower, b = prim + H_upper, v1_upper, H_lower, v1_lower, b = prim - h_lower = H_lower - b - h_upper = H_upper - h_lower - b - h_v1_upper = h_upper * v1_upper - h_v2_lower = h_lower * v1_lower - return SVector(h_upper, h_v1_upper, h_lower, h_v2_lower, b) + h_lower = H_lower - b + h_upper = H_upper - h_lower - b + h_v1_upper = h_upper * v1_upper + h_v2_lower = h_lower * v1_lower + return SVector(h_upper, h_v1_upper, h_lower, h_v2_lower, b) end - @inline function waterheight(u, equations::ShallowWaterTwoLayerEquations1D) - return SVector(u[1], u[3]) + return SVector(u[1], u[3]) end - # Entropy function for the shallow water equations is the total energy -@inline entropy(cons, equations::ShallowWaterTwoLayerEquations1D) = energy_total(cons, equations) - +@inline function entropy(cons, equations::ShallowWaterTwoLayerEquations1D) + energy_total(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline function energy_total(cons, equations::ShallowWaterTwoLayerEquations1D) - h_upper, h_lower, h_v1_upper, h_v2_lower, b = cons - # Set new variables for better readability - g = equations.gravity - rho_upper = equations.rho_upper - rho_lower = equations.rho_lower - - e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + g * h_upper^2) + 0.5 * rho_lower * (h_v2_lower^2 / h_lower + g * h_lower^2) + - g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b)) - return e + h_upper, h_lower, h_v1_upper, h_v2_lower, b = cons + # Set new variables for better readability + g = equations.gravity + rho_upper = equations.rho_upper + rho_lower = equations.rho_lower + + e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + g * h_upper^2) + + 0.5 * rho_lower * (h_v2_lower^2 / h_lower + g * h_lower^2) + + g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b)) + return e end - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u - return 0.5 * equations.rho_upper * h_v1_upper^2 / h_upper + 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower + h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u + return 0.5 * equations.rho_upper * h_v1_upper^2 / h_upper + + 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower end - # Calculate potential energy for a conservative state `cons` @inline function energy_internal(cons, equations::ShallowWaterTwoLayerEquations1D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - # Calculate the error for the "lake-at-rest" test case where H = h_upper+h_lower+b should # be a constant value over time @inline function lake_at_rest_error(u, equations::ShallowWaterTwoLayerEquations1D) - h_upper, _, h_lower, _, b = u - return abs(equations.H0 - (h_upper + h_lower + b)) + h_upper, _, h_lower, _, b = u + return abs(equations.H0 - (h_upper + h_lower + b)) end - end # @muladd diff --git a/src/equations/shallow_water_two_layer_2d.jl b/src/equations/shallow_water_two_layer_2d.jl index 60c389d8c4a..b5e52d636e4 100644 --- a/src/equations/shallow_water_two_layer_2d.jl +++ b/src/equations/shallow_water_two_layer_2d.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent @doc raw""" ShallowWaterTwoLayerEquations2D(gravity, H0, rho_upper, rho_lower) @@ -71,38 +72,41 @@ A good introduction for the 2LSWE is available in Chapter 12 of the book: \ ISBN: 978-0-12-088759-0 """ -struct ShallowWaterTwoLayerEquations2D{RealT<:Real} <: AbstractShallowWaterEquations{2, 7} - gravity::RealT # gravitational constant - H0::RealT # constant "lake-at-rest" total water height - rho_upper::RealT # lower layer density - rho_lower::RealT # upper layer density - r::RealT # ratio of rho_upper / rho_lower +struct ShallowWaterTwoLayerEquations2D{RealT <: Real} <: + AbstractShallowWaterEquations{2, 7} + gravity::RealT # gravitational constant + H0::RealT # constant "lake-at-rest" total water height + rho_upper::RealT # lower layer density + rho_lower::RealT # upper layer density + r::RealT # ratio of rho_upper / rho_lower end # Allow for flexibility to set the gravitational constant within an elixir depending on the # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values. # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest" # well-balancedness test cases. Densities must be specified such that rho_upper < rho_lower. -function ShallowWaterTwoLayerEquations2D(; gravity_constant, H0=zero(gravity_constant), rho_upper, rho_lower) - # Assign density ratio if rho_upper <= rho_lower - if rho_upper > rho_lower - error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower") - else - r = rho_upper / rho_lower - end - ShallowWaterTwoLayerEquations2D(gravity_constant, H0, rho_upper, rho_lower, r) +function ShallowWaterTwoLayerEquations2D(; gravity_constant, + H0 = zero(gravity_constant), rho_upper, + rho_lower) + # Assign density ratio if rho_upper <= rho_lower + if rho_upper > rho_lower + error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower") + else + r = rho_upper / rho_lower + end + ShallowWaterTwoLayerEquations2D(gravity_constant, H0, rho_upper, rho_lower, r) end - have_nonconservative_terms(::ShallowWaterTwoLayerEquations2D) = True() -varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations2D) = ( - "h_upper", "h_v1_upper", "h_v2_upper", "h_lower", "h_v1_lower", "h_v2_lower", "b") +function varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations2D) + ("h_upper", "h_v1_upper", "h_v2_upper", "h_lower", "h_v1_lower", "h_v2_lower", "b") +end # Note, we use the total water height, H_upper = h_upper + h_lower + b, and first layer total height # H_lower = h_lower + b as the first primitive variable for easier visualization and setting initial # conditions -varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations2D) = ( - "H_upper", "v1_upper", "v2_upper", "H_lower", "v1_lower", "v2_lower", "b") - +function varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations2D) + ("H_upper", "v1_upper", "v2_upper", "H_lower", "v1_lower", "v2_lower", "b") +end # Set initial conditions at physical location `x` for time `t` """ @@ -112,70 +116,111 @@ A smooth initial condition used for convergence tests in combination with [`source_terms_convergence_test`](@ref). Constants must be set to ``rho_{upper} = 0.9``, ``rho_{lower} = 1.0``, ``g = 10.0``. """ -function initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations2D) - # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2] - ω = 2.0 * pi * sqrt(2.0) - - H_lower = 2.0 + 0.1 * sin(ω * x[1] + t) * cos(ω * x[2] + t) - H_upper = 4.0 + 0.1 * cos(ω * x[1] + t) * sin(ω * x[2] + t) - v1_lower = 1.0 - v1_upper = 0.9 - v2_lower = 0.9 - v2_upper = 1.0 - b = 1.0 + 0.1 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) - - return prim2cons(SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b), equations) +function initial_condition_convergence_test(x, t, + equations::ShallowWaterTwoLayerEquations2D) + # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2] + ω = 2.0 * pi * sqrt(2.0) + + H_lower = 2.0 + 0.1 * sin(ω * x[1] + t) * cos(ω * x[2] + t) + H_upper = 4.0 + 0.1 * cos(ω * x[1] + t) * sin(ω * x[2] + t) + v1_lower = 1.0 + v1_upper = 0.9 + v2_lower = 0.9 + v2_upper = 1.0 + b = 1.0 + 0.1 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) + + return prim2cons(SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, + b), equations) end - """ source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations2D) Source terms used for convergence tests in combination with [`initial_condition_convergence_test`](@ref). """ -@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations2D) - # Same settings as in `initial_condition_convergence_test`. - # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2] - ω = 2.0 * pi * sqrt(2.0) - - # Source terms obtained with SymPy - du1 = 0.01*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.01*ω*sin(t + ω*x[1])*sin(t + ω*x[2]) - du2 = (5.0 * (-0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) - 0.1*ω*sin(t + ω*x[1])*sin(t + - ω*x[2])) * (4.0 + 0.2cos(t + ω*x[1])*sin(t + ω*x[2]) - 0.2*sin(t + ω*x[1])*cos(t + - ω*x[2])) + 0.009*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.009*ω*sin(t + ω*x[1])*sin(t + - ω*x[2]) + 0.1*ω*(20.0 + cos(t + ω*x[1])*sin(t + ω*x[2]) - sin(t + ω*x[1])*cos(t + - ω*x[2])) * cos(t + ω*x[1])*cos(t + ω*x[2])) - du3 = (5.0 * (0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*sin(t + ω*x[1])*sin(t + - ω*x[2])) * (4.0 + 0.2*cos(t + ω*x[1])*sin(t + ω*x[2]) - 0.2*sin(t + ω*x[1])*cos(t + - ω*x[2])) + 0.01ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.01*ω*sin(t + ω*x[1])*sin(t + ω*x[2]) + - -0.1*ω*(20.0 + cos(t + ω*x[1])*sin(t + ω*x[2]) - sin(t + ω*x[1])*cos(t + ω*x[2]))*sin(t + - ω*x[1])*sin(t + ω*x[2])) - du4 = (0.1*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + - 0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) - 0.1*sin(t + ω*x[1])*sin(t + ω*x[2]) + - -0.045*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2])) - du5 = ((10.0 + sin(t + ω*x[1])*cos(t + ω*x[2]) - cos(0.5*ω*x[1])*sin(0.5*ω*x[2]))*(-0.09*ω*cos(t + - ω*x[1])*cos(t + ω*x[2]) - 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2]) + - -0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 5.0 * (0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + - 0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2])) * (2.0 + 0.2*sin(t + ω*x[1])*cos(t + ω*x[2]) + - -0.2*cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 0.1*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*cos(t + - ω*x[1])*cos(t + ω*x[2]) + 0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) - 0.1*sin(t + - ω*x[1])*sin(t + ω*x[2]) - 0.045*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.09*ω*sin(t + - ω*x[1])*sin(t + ω*x[2])) - du6 = ((10.0 + sin(t + ω*x[1])*cos(t + ω*x[2]) + - -cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) * (0.05*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) + - 0.09*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2])) + - 5.0 * (-0.05*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.1*ω*sin(t + ω*x[1])*sin(t + - ω*x[2])) * (2.0 + 0.2*sin(t + ω*x[1])*cos(t + ω*x[2]) + - -0.2*cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 0.09cos(t + ω*x[1])*cos(t + ω*x[2]) + - 0.09*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.045*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) + - -0.09*sin(t + ω*x[1])*sin(t + ω*x[2]) - 0.0405*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) + - -0.081*ω*sin(t + ω*x[1])*sin(t + ω*x[2])) - - return SVector(du1, du2, du3, du4, du5, du6, zero(eltype(u))) +@inline function source_terms_convergence_test(u, x, t, + equations::ShallowWaterTwoLayerEquations2D) + # Same settings as in `initial_condition_convergence_test`. + # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2] + ω = 2.0 * pi * sqrt(2.0) + + # Source terms obtained with SymPy + du1 = 0.01 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.01 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]) + du2 = (5.0 * + (-0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) - + 0.1 * ω * sin(t + ω * x[1]) * sin(t + + ω * x[2])) * + (4.0 + 0.2cos(t + ω * x[1]) * sin(t + ω * x[2]) - + 0.2 * sin(t + ω * x[1]) * cos(t + + ω * x[2])) + + 0.009 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.009 * ω * sin(t + ω * x[1]) * sin(t + + ω * x[2]) + + 0.1 * ω * + (20.0 + cos(t + ω * x[1]) * sin(t + ω * x[2]) - + sin(t + ω * x[1]) * cos(t + + ω * x[2])) * cos(t + ω * x[1]) * cos(t + ω * x[2])) + du3 = (5.0 * + (0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.1 * ω * sin(t + ω * x[1]) * sin(t + + ω * x[2])) * + (4.0 + 0.2 * cos(t + ω * x[1]) * sin(t + ω * x[2]) - + 0.2 * sin(t + ω * x[1]) * cos(t + + ω * x[2])) + + 0.01ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.01 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]) + + -0.1 * ω * + (20.0 + cos(t + ω * x[1]) * sin(t + ω * x[2]) - + sin(t + ω * x[1]) * cos(t + ω * x[2])) * sin(t + + ω * x[1]) * sin(t + ω * x[2])) + du4 = (0.1 * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) - + 0.1 * sin(t + ω * x[1]) * sin(t + ω * x[2]) + + -0.045 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) - + 0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2])) + du5 = ((10.0 + sin(t + ω * x[1]) * cos(t + ω * x[2]) - + cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) * (-0.09 * ω * cos(t + + ω * x[1]) * cos(t + ω * x[2]) - + 0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]) + + -0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) + + 5.0 * + (0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) * + (2.0 + 0.2 * sin(t + ω * x[1]) * cos(t + ω * x[2]) + + -0.2 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) + + 0.1 * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.1 * ω * cos(t + + ω * x[1]) * cos(t + ω * x[2]) + + 0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) - + 0.1 * sin(t + + ω * x[1]) * sin(t + ω * x[2]) - + 0.045 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) - + 0.09 * ω * sin(t + + ω * x[1]) * sin(t + ω * x[2])) + du6 = ((10.0 + sin(t + ω * x[1]) * cos(t + ω * x[2]) + + -cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) * + (0.05 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) + + 0.09 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2])) + + 5.0 * + (-0.05 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) - + 0.1 * ω * sin(t + ω * x[1]) * sin(t + + ω * x[2])) * + (2.0 + 0.2 * sin(t + ω * x[1]) * cos(t + ω * x[2]) + + -0.2 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) + + 0.09cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.09 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) + + 0.045 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) + + -0.09 * sin(t + ω * x[1]) * sin(t + ω * x[2]) - + 0.0405 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) + + -0.081 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2])) + + return SVector(du1, du2, du3, du4, du5, du6, zero(eltype(u))) end - """ boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function, equations::ShallowWaterTwoLayerEquations2D) @@ -193,85 +238,84 @@ For details see Section 9.2.5 of the book: @inline function boundary_condition_slip_wall(u_inner, normal_direction::AbstractVector, x, t, surface_flux_function, equations::ShallowWaterTwoLayerEquations2D) - # normalize the outward pointing direction - normal = normal_direction / norm(normal_direction) - - # compute the normal velocity - v_normal_upper = normal[1] * u_inner[2] + normal[2] * u_inner[3] - v_normal_lower = normal[1] * u_inner[5] + normal[2] * u_inner[6] - - # create the "external" boundary solution state - u_boundary = SVector(u_inner[1], - u_inner[2] - 2.0 * v_normal_upper * normal[1], - u_inner[3] - 2.0 * v_normal_upper * normal[2], - u_inner[4], - u_inner[5] - 2.0 * v_normal_lower * normal[1], - u_inner[6] - 2.0 * v_normal_lower * normal[2], - u_inner[7]) - - # calculate the boundary flux - flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) - return flux + # normalize the outward pointing direction + normal = normal_direction / norm(normal_direction) + + # compute the normal velocity + v_normal_upper = normal[1] * u_inner[2] + normal[2] * u_inner[3] + v_normal_lower = normal[1] * u_inner[5] + normal[2] * u_inner[6] + + # create the "external" boundary solution state + u_boundary = SVector(u_inner[1], + u_inner[2] - 2.0 * v_normal_upper * normal[1], + u_inner[3] - 2.0 * v_normal_upper * normal[2], + u_inner[4], + u_inner[5] - 2.0 * v_normal_lower * normal[1], + u_inner[6] - 2.0 * v_normal_lower * normal[2], + u_inner[7]) + + # calculate the boundary flux + flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations) + return flux end - # Calculate 1D flux for a single point # Note, the bottom topography has no flux -@inline function flux(u, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u - - # Calculate velocities - v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) - - # Calculate pressure - p1 = 0.5 * equations.gravity * h_upper^2 - p2 = 0.5 * equations.gravity * h_lower^2 - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = h_v1_upper - f2 = h_v1_upper * v1_upper + p1 - f3 = h_v1_upper * v2_upper - f4 = h_v1_lower - f5 = h_v1_lower * v1_lower + p2 - f6 = h_v1_lower * v2_lower - else - f1 = h_v2_upper - f2 = h_v2_upper * v1_upper - f3 = h_v2_upper * v2_upper + p1 - f4 = h_v2_lower - f5 = h_v2_lower * v1_lower - f6 = h_v2_lower * v2_lower + p2 - end - return SVector(f1, f2, f3, f4, f5 , f6, zero(eltype(u))) +@inline function flux(u, orientation::Integer, + equations::ShallowWaterTwoLayerEquations2D) + h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u + + # Calculate velocities + v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) + + # Calculate pressure + p1 = 0.5 * equations.gravity * h_upper^2 + p2 = 0.5 * equations.gravity * h_lower^2 + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = h_v1_upper + f2 = h_v1_upper * v1_upper + p1 + f3 = h_v1_upper * v2_upper + f4 = h_v1_lower + f5 = h_v1_lower * v1_lower + p2 + f6 = h_v1_lower * v2_lower + else + f1 = h_v2_upper + f2 = h_v2_upper * v1_upper + f3 = h_v2_upper * v2_upper + p1 + f4 = h_v2_lower + f5 = h_v2_lower * v1_lower + f6 = h_v2_lower * v2_lower + p2 + end + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u))) end # Calculate 1D flux for a single point in the normal direction # Note, this directional vector is not normalized and the bottom topography has no flux -@inline function flux(u, normal_direction::AbstractVector, +@inline function flux(u, normal_direction::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_lower = waterheight(u, equations) - v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) + h_upper, h_lower = waterheight(u, equations) + v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) - v_normal_upper = v1_upper * normal_direction[1] + v2_upper * normal_direction[2] - v_normal_lower = v1_lower * normal_direction[1] + v2_lower * normal_direction[2] - h_v_upper_normal = h_upper * v_normal_upper - h_v_lower_normal = h_lower * v_normal_lower + v_normal_upper = v1_upper * normal_direction[1] + v2_upper * normal_direction[2] + v_normal_lower = v1_lower * normal_direction[1] + v2_lower * normal_direction[2] + h_v_upper_normal = h_upper * v_normal_upper + h_v_lower_normal = h_lower * v_normal_lower - p1 = 0.5 * equations.gravity * h_upper^2 - p2 = 0.5 * equations.gravity * h_lower^2 + p1 = 0.5 * equations.gravity * h_upper^2 + p2 = 0.5 * equations.gravity * h_lower^2 - f1 = h_v_upper_normal - f2 = h_v_upper_normal * v1_upper + p1 * normal_direction[1] - f3 = h_v_upper_normal * v2_upper + p1 * normal_direction[2] - f4 = h_v_lower_normal - f5 = h_v_lower_normal * v1_lower + p2 * normal_direction[1] - f6 = h_v_lower_normal * v2_lower + p2 * normal_direction[2] + f1 = h_v_upper_normal + f2 = h_v_upper_normal * v1_upper + p1 * normal_direction[1] + f3 = h_v_upper_normal * v2_upper + p1 * normal_direction[2] + f4 = h_v_lower_normal + f5 = h_v_lower_normal * v1_lower + p2 * normal_direction[1] + f6 = h_v_lower_normal * v2_lower + p2 * normal_direction[2] - return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u))) + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u))) end - """ flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) @@ -290,58 +334,61 @@ Further details are available in the paper: shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036) """ -@inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, +@inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - # Pull the necessary left and right state information - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - b_rr = u_rr[7] - - z = zero(eltype(u_ll)) - - # Bottom gradient nonconservative term: (0, g*h_upper*(b + h_lower)_x, g*h_upper*(b + h_lower)_y , - # 0, g*h_lower*(b + r*h_upper)_x, - # g*h_lower*(b + r*h_upper)_y, 0) - if orientation == 1 - f = SVector(z, - equations.gravity * h_upper_ll * (b_rr + h_lower_rr), - z,z, - equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), - z,z) - else # orientation == 2 - f = SVector(z, z, - equations.gravity * h_upper_ll * (b_rr + h_lower_rr), - z,z, - equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), - z) - end - - return f + # Pull the necessary left and right state information + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + b_rr = u_rr[7] + + z = zero(eltype(u_ll)) + + # Bottom gradient nonconservative term: (0, g*h_upper*(b + h_lower)_x, g*h_upper*(b + h_lower)_y , + # 0, g*h_lower*(b + r*h_upper)_x, + # g*h_lower*(b + r*h_upper)_y, 0) + if orientation == 1 + f = SVector(z, + equations.gravity * h_upper_ll * (b_rr + h_lower_rr), + z, z, + equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), + z, z) + else # orientation == 2 + f = SVector(z, z, + equations.gravity * h_upper_ll * (b_rr + h_lower_rr), + z, z, + equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr), + z) + end + + return f end @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - # Pull the necessary left and right state information - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - b_rr = u_rr[7] - - # Note this routine only uses the `normal_direction_average` and the average of the - # bottom topography to get a quadratic split form DG gradient on curved elements - return SVector(zero(eltype(u_ll)), - normal_direction_average[1] * equations.gravity * h_upper_ll * (b_rr + h_lower_rr), - normal_direction_average[2] * equations.gravity * h_upper_ll * (b_rr + h_lower_rr), - zero(eltype(u_ll)), - normal_direction_average[1] * equations.gravity * h_lower_ll * (b_rr + + # Pull the necessary left and right state information + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + b_rr = u_rr[7] + + # Note this routine only uses the `normal_direction_average` and the average of the + # bottom topography to get a quadratic split form DG gradient on curved elements + return SVector(zero(eltype(u_ll)), + normal_direction_average[1] * equations.gravity * h_upper_ll * + (b_rr + h_lower_rr), + normal_direction_average[2] * equations.gravity * h_upper_ll * + (b_rr + h_lower_rr), + zero(eltype(u_ll)), + normal_direction_average[1] * equations.gravity * h_lower_ll * + (b_rr + equations.r * h_upper_rr), - normal_direction_average[2] * equations.gravity * h_lower_ll * (b_rr + + normal_direction_average[2] * equations.gravity * h_lower_ll * + (b_rr + equations.r * h_upper_rr), - zero(eltype(u_ll))) - end - + zero(eltype(u_ll))) +end """ flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, @@ -362,90 +409,99 @@ It should be noted that the equations are ordered differently and the designation of the upper and lower layer has been changed which leads to a slightly different formulation. """ -@inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, +@inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - # Pull the necessary left and right state information - h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll - h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr - - # Create average and jump values - h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) - h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) - h_upper_jump = h_upper_rr - h_upper_ll - h_lower_jump = h_lower_rr - h_lower_ll - b_jump = b_rr - b_ll - - # Assign variables for constants for better readability - g = equations.gravity - - # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, g*h_upper*(b+h_lower)_y, 0, - # g*h_lower*(b+r*h_upper)_x, g*h_lower*(b+r*h_upper)_x, 0) - - # Includes two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid - # cross-averaging across a discontinuous bottom topography - # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry - z = zero(eltype(u_ll)) - if orientation == 1 - f = SVector( - z, - g * h_upper_ll * (b_ll + h_lower_ll) + g * h_upper_average * (b_jump + h_lower_jump), - z,z, - g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump + - equations.r * h_upper_jump), - z,z) - else # orientation == 2 - f = SVector( - z,z, - g * h_upper_ll * (b_ll + h_lower_ll) + g * h_upper_average * (b_jump + h_lower_jump), - z,z, - g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump + - equations.r * h_upper_jump), - z) - end - - return f + # Pull the necessary left and right state information + h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll + h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr + + # Create average and jump values + h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) + h_upper_jump = h_upper_rr - h_upper_ll + h_lower_jump = h_lower_rr - h_lower_ll + b_jump = b_rr - b_ll + + # Assign variables for constants for better readability + g = equations.gravity + + # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, g*h_upper*(b+h_lower)_y, 0, + # g*h_lower*(b+r*h_upper)_x, g*h_lower*(b+r*h_upper)_x, 0) + + # Includes two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid + # cross-averaging across a discontinuous bottom topography + # (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry + z = zero(eltype(u_ll)) + if orientation == 1 + f = SVector(z, + g * h_upper_ll * (b_ll + h_lower_ll) + + g * h_upper_average * (b_jump + h_lower_jump), + z, z, + g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + + g * h_lower_average * (b_jump + + equations.r * h_upper_jump), + z, z) + else # orientation == 2 + f = SVector(z, z, + g * h_upper_ll * (b_ll + h_lower_ll) + + g * h_upper_average * (b_jump + h_lower_jump), + z, z, + g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + + g * h_lower_average * (b_jump + + equations.r * h_upper_jump), + z) + end + + return f end @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, normal_direction_ll::AbstractVector, normal_direction_average::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - # Pull the necessary left and right state information - h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll - h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr - - # Create average and jump values - h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) - h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) - h_upper_jump = h_upper_rr - h_upper_ll - h_lower_jump = h_lower_rr - h_lower_ll - b_jump = b_rr - b_ll - - # Comes in two parts: - # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` - # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography - f2 = normal_direction_average[1] * equations.gravity*h_upper_ll*(b_ll + h_lower_ll) - f3 = normal_direction_average[2] * equations.gravity*h_upper_ll*(b_ll + h_lower_ll) - f5 = normal_direction_average[1] * equations.gravity*h_lower_ll*(b_ll + equations.r * h_upper_ll) - f6 = normal_direction_average[2] * equations.gravity*h_lower_ll*(b_ll + equations.r * h_upper_ll) - # (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump` - # to handle discontinuous bathymetry - f2 += normal_direction_ll[1] * equations.gravity*h_upper_average*(b_jump + h_lower_jump) - f3 += normal_direction_ll[2] * equations.gravity*h_upper_average*(b_jump + h_lower_jump) - f5 += normal_direction_ll[1] * equations.gravity*h_lower_average*(b_jump + - equations.r * h_upper_jump) - f6 += normal_direction_ll[2] * equations.gravity*h_lower_average*(b_jump + - equations.r * h_upper_jump) - - # Continuity equations do not have a nonconservative flux - f1 = f4 = zero(eltype(u_ll)) - -return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) + # Pull the necessary left and right state information + h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll + h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr + + # Create average and jump values + h_upper_average = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_average = 0.5 * (h_lower_ll + h_lower_rr) + h_upper_jump = h_upper_rr - h_upper_ll + h_lower_jump = h_lower_rr - h_lower_ll + b_jump = b_rr - b_ll + + # Comes in two parts: + # (i) Diagonal (consistent) term from the volume flux that uses `normal_direction_average` + # but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography + f2 = normal_direction_average[1] * equations.gravity * h_upper_ll * + (b_ll + h_lower_ll) + f3 = normal_direction_average[2] * equations.gravity * h_upper_ll * + (b_ll + h_lower_ll) + f5 = normal_direction_average[1] * equations.gravity * h_lower_ll * + (b_ll + equations.r * h_upper_ll) + f6 = normal_direction_average[2] * equations.gravity * h_lower_ll * + (b_ll + equations.r * h_upper_ll) + # (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump` + # to handle discontinuous bathymetry + f2 += normal_direction_ll[1] * equations.gravity * h_upper_average * + (b_jump + h_lower_jump) + f3 += normal_direction_ll[2] * equations.gravity * h_upper_average * + (b_jump + h_lower_jump) + f5 += normal_direction_ll[1] * equations.gravity * h_lower_average * + (b_jump + + equations.r * h_upper_jump) + f6 += normal_direction_ll[2] * equations.gravity * h_lower_average * + (b_jump + + equations.r * h_upper_jump) + + # Continuity equations do not have a nonconservative flux + f1 = f4 = zero(eltype(u_ll)) + + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) end - """ flux_fjordholm_etal(u_ll, u_rr, orientation, equations::ShallowWaterTwoLayerEquations2D) @@ -467,83 +523,86 @@ designation of the upper and lower layer has been changed which leads to a sligh formulation. """ @inline function flux_fjordholm_etal(u_ll, u_rr, - orientation::Integer, + orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr ) - h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr ) - v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr ) - v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr ) - v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr ) - v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr ) - p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2) - p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2) - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = h_upper_avg * v1_upper_avg - f2 = f1 * v1_upper_avg + p1_avg - f3 = f1 * v2_upper_avg - f4 = h_lower_avg * v1_lower_avg - f5 = f4 * v1_lower_avg + p2_avg - f6 = f4 * v2_lower_avg - else - f1 = h_upper_avg * v2_upper_avg - f2 = f1 * v1_upper_avg - f3 = f1 * v2_upper_avg + p1_avg - f4 = h_lower_avg * v2_lower_avg - f5 = f4 * v1_lower_avg - f6 = f4 * v2_lower_avg + p2_avg - end - - return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr) + v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr) + v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr) + v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr) + v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr) + p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2) + p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2) + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = h_upper_avg * v1_upper_avg + f2 = f1 * v1_upper_avg + p1_avg + f3 = f1 * v2_upper_avg + f4 = h_lower_avg * v1_lower_avg + f5 = f4 * v1_lower_avg + p2_avg + f6 = f4 * v2_lower_avg + else + f1 = h_upper_avg * v2_upper_avg + f2 = f1 * v1_upper_avg + f3 = f1 * v2_upper_avg + p1_avg + f4 = h_lower_avg * v2_lower_avg + f5 = f4 * v1_lower_avg + f6 = f4 * v2_lower_avg + p2_avg + end + + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) end @inline function flux_fjordholm_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) - - # Compute velocity in normal direction - v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + v2_upper_ll * normal_direction[2] - v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + v2_upper_rr * normal_direction[2] - v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + v2_lower_ll * normal_direction[2] - v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + v2_lower_rr * normal_direction[2] - - # Average each factor of products in flux - h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr ) - h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr ) - v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr ) - v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr ) - v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr ) - v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr ) - p1_avg = 0.25* equations.gravity * (h_upper_ll^2 + h_upper_rr^2) - p2_avg = 0.25* equations.gravity * (h_lower_ll^2 + h_lower_rr^2) - v_upper_dot_n_avg = 0.5 * (v_upper_dot_n_ll + v_upper_dot_n_rr) - v_lower_dot_n_avg = 0.5 * (v_lower_dot_n_ll + v_lower_dot_n_rr) - - # Calculate fluxes depending on normal_direction - f1 = h_upper_avg * v_upper_dot_n_avg - f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1] - f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2] - f4 = h_lower_avg * v_lower_dot_n_avg - f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1] - f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2] - - return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) + + # Compute velocity in normal direction + v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + + v2_upper_ll * normal_direction[2] + v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + + v2_upper_rr * normal_direction[2] + v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + + v2_lower_ll * normal_direction[2] + v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + + v2_lower_rr * normal_direction[2] + + # Average each factor of products in flux + h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr) + h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr) + v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr) + v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr) + v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr) + v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr) + p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2) + p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2) + v_upper_dot_n_avg = 0.5 * (v_upper_dot_n_ll + v_upper_dot_n_rr) + v_lower_dot_n_avg = 0.5 * (v_lower_dot_n_ll + v_lower_dot_n_rr) + + # Calculate fluxes depending on normal_direction + f1 = h_upper_avg * v_upper_dot_n_avg + f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1] + f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2] + f4 = h_lower_avg * v_lower_dot_n_avg + f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1] + f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2] + + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) end - """ flux_wintermeyer_etal(u_ll, u_rr, orientation, equations::ShallowWaterTwoLayerEquations2D) @@ -563,77 +622,76 @@ Further details are available in Theorem 1 of the paper: @inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll - h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr - - # Get the velocities on either side - v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) - v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr ) - v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr ) - v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr ) - v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr ) - p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr - p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr - - # Calculate fluxes depending on orientation - if orientation == 1 - f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr) - f2 = f1 * v1_upper_avg + p1_avg - f3 = f1 * v2_upper_avg - f4 = 0.5 * (h_v1_lower_ll + h_v1_lower_rr) - f5 = f4 * v1_lower_avg + p2_avg - f6 = f4 * v2_lower_avg - else - f1 = 0.5 * (h_v2_upper_ll + h_v2_upper_rr) - f2 = f1 * v1_upper_avg - f3 = f1 * v2_upper_avg + p1_avg - f4 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr) - f5 = f4 * v1_lower_avg - f6 = f4 * v2_lower_avg + p2_avg - end - - return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll + h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr + + # Get the velocities on either side + v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) + v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr) + v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr) + v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr) + v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr) + p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr + p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr + + # Calculate fluxes depending on orientation + if orientation == 1 + f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr) + f2 = f1 * v1_upper_avg + p1_avg + f3 = f1 * v2_upper_avg + f4 = 0.5 * (h_v1_lower_ll + h_v1_lower_rr) + f5 = f4 * v1_lower_avg + p2_avg + f6 = f4 * v2_lower_avg + else + f1 = 0.5 * (h_v2_upper_ll + h_v2_upper_rr) + f2 = f1 * v1_upper_avg + f3 = f1 * v2_upper_avg + p1_avg + f4 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr) + f5 = f4 * v1_lower_avg + f6 = f4 * v2_lower_avg + p2_avg + end + + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) end @inline function flux_wintermeyer_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll - h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr - - # Get the velocities on either side - v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) - v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) - - # Average each factor of products in flux - v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr ) - v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr ) - v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr ) - v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr ) - p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr - p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr - h_v1_upper_avg = 0.5 * (h_v1_upper_ll + h_v1_upper_rr ) - h_v2_upper_avg = 0.5 * (h_v2_upper_ll + h_v2_upper_rr ) - h_v1_lower_avg = 0.5 * (h_v1_lower_ll + h_v1_lower_rr ) - h_v2_lower_avg = 0.5 * (h_v2_lower_ll + h_v2_lower_rr ) - - # Calculate fluxes depending on normal_direction - f1 = h_v1_upper_avg * normal_direction[1] + h_v2_upper_avg * normal_direction[2] - f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1] - f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2] - f4 = h_v1_lower_avg * normal_direction[1] + h_v2_lower_avg * normal_direction[2] - f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1] - f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2] - - return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) + # Unpack left and right state + h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll + h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr + + # Get the velocities on either side + v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) + v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) + + # Average each factor of products in flux + v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr) + v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr) + v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr) + v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr) + p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr + p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr + h_v1_upper_avg = 0.5 * (h_v1_upper_ll + h_v1_upper_rr) + h_v2_upper_avg = 0.5 * (h_v2_upper_ll + h_v2_upper_rr) + h_v1_lower_avg = 0.5 * (h_v1_lower_ll + h_v1_lower_rr) + h_v2_lower_avg = 0.5 * (h_v2_lower_ll + h_v2_lower_rr) + + # Calculate fluxes depending on normal_direction + f1 = h_v1_upper_avg * normal_direction[1] + h_v2_upper_avg * normal_direction[2] + f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1] + f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2] + f4 = h_v1_lower_avg * normal_direction[1] + h_v2_lower_avg * normal_direction[2] + f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1] + f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2] + + return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll))) end - """ flux_es_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction, equations::ShallowWaterTwoLayerEquations1D) @@ -650,85 +708,87 @@ designation of the upper and lower layer has been changed which leads to a sligh formulation. """ @inline function flux_es_fjordholm_etal(u_ll, u_rr, - orientation_or_normal_direction, - equations::ShallowWaterTwoLayerEquations2D) - # Compute entropy conservative flux but without the bottom topography - f_ec = flux_fjordholm_etal(u_ll, u_rr, - orientation_or_normal_direction, - equations) - - # Get maximum signal velocity - λ = max_abs_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations) - - # Get entropy variables but without the bottom topography - q_rr = cons2entropy(u_rr,equations) - q_ll = cons2entropy(u_ll,equations) - - # Average values from left and right - u_avg = (u_ll + u_rr)/2 - - # Introduce variables for better readability - rho_upper = equations.rho_upper - rho_lower = equations.rho_lower - g = equations.gravity - drho = rho_upper - rho_lower - - # Entropy Jacobian matrix - H = @SMatrix [ - [-rho_lower/(g*rho_upper*drho);; - -rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);; - -rho_lower*u_avg[3]/(g*rho_upper*u_avg[1]*drho);; - 1.0/(g*drho);; - u_avg[5]/(g*u_avg[4]*drho);; - u_avg[6]/(g*u_avg[4]*drho);; - 0]; - [-rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);; - (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 + - -rho_lower*u_avg[2]^2)/(g*rho_upper*u_avg[1]^2*drho);; - -rho_lower*u_avg[2]*u_avg[3]/(g*rho_upper*u_avg[1]^2*drho);; - u_avg[2]/(g*u_avg[1]*drho);; - u_avg[2]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);; - u_avg[2]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);; - 0]; - [-rho_lower*u_avg[3]/(g*rho_upper*u_avg[1]*drho);; - -rho_lower*u_avg[2]*u_avg[3]/(g*rho_upper*u_avg[1]^2*drho);; - (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 + - -rho_lower*u_avg[3]^2)/(g*rho_upper*u_avg[1]^2*drho);; - u_avg[3]/(g*u_avg[1]*drho);; - u_avg[3]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);; - u_avg[3]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);; - 0]; - [1.0/(g*drho);; - u_avg[2]/(g*u_avg[1]*drho);; - u_avg[3]/(g*u_avg[1]*drho);; - -1.0/(g*drho);; - -u_avg[5]/(g*u_avg[4]*drho);; - -u_avg[6]/(g*u_avg[4]*drho);; - 0]; - [u_avg[5]/(g*u_avg[4]*drho);; - u_avg[2]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);; - u_avg[3]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);; - -u_avg[5]/(g*u_avg[4]*drho);; - (g*rho_upper*u_avg[4]^3 - g*rho_lower*u_avg[4]^3 + - -rho_lower*u_avg[5]^2)/(g*rho_lower*u_avg[4]^2*drho);; - -u_avg[5]*u_avg[6]/(g*u_avg[4]^2*drho);; - 0]; - [u_avg[6]/(g*u_avg[4]*drho);; - u_avg[2]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);; - u_avg[3]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);; - -u_avg[6]/(g*u_avg[4]*drho);; - -u_avg[5]*u_avg[6]/(g*u_avg[4]^2*drho);; - (g*rho_upper*u_avg[4]^3 - g*rho_lower*u_avg[4]^3 + - -rho_lower*u_avg[6]^2)/(g*rho_lower*u_avg[4]^2*drho);;0]; - [0;;0;;0;;0;;0;;0;;0]] - - # Add dissipation to entropy conservative flux to obtain entropy stable flux - f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll) - - return SVector(f_es[1], f_es[2], f_es[3], f_es[4], f_es[5], f_es[6], zero(eltype(u_ll))) + orientation_or_normal_direction, + equations::ShallowWaterTwoLayerEquations2D) + # Compute entropy conservative flux but without the bottom topography + f_ec = flux_fjordholm_etal(u_ll, u_rr, + orientation_or_normal_direction, + equations) + + # Get maximum signal velocity + λ = max_abs_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations) + + # Get entropy variables but without the bottom topography + q_rr = cons2entropy(u_rr, equations) + q_ll = cons2entropy(u_ll, equations) + + # Average values from left and right + u_avg = (u_ll + u_rr) / 2 + + # Introduce variables for better readability + rho_upper = equations.rho_upper + rho_lower = equations.rho_lower + g = equations.gravity + drho = rho_upper - rho_lower + + # Compute entropy Jacobian coefficients + h11 = -rho_lower / (g * rho_upper * drho) + h12 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho) + h13 = -rho_lower * u_avg[3] / (g * rho_upper * u_avg[1] * drho) + h14 = 1.0 / (g * drho) + h15 = u_avg[5] / (g * u_avg[4] * drho) + h16 = u_avg[6] / (g * u_avg[4] * drho) + h21 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho) + h22 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 + + -rho_lower * u_avg[2]^2) / (g * rho_upper * u_avg[1]^2 * drho)) + h23 = -rho_lower * u_avg[2] * u_avg[3] / (g * rho_upper * u_avg[1]^2 * drho) + h24 = u_avg[2] / (g * u_avg[1] * drho) + h25 = u_avg[2] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho) + h26 = u_avg[2] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho) + h31 = -rho_lower * u_avg[3] / (g * rho_upper * u_avg[1] * drho) + h32 = -rho_lower * u_avg[2] * u_avg[3] / (g * rho_upper * u_avg[1]^2 * drho) + h33 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 + + -rho_lower * u_avg[3]^2) / (g * rho_upper * u_avg[1]^2 * drho)) + h34 = u_avg[3] / (g * u_avg[1] * drho) + h35 = u_avg[3] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho) + h36 = u_avg[3] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho) + h41 = 1.0 / (g * drho) + h42 = u_avg[2] / (g * u_avg[1] * drho) + h43 = u_avg[3] / (g * u_avg[1] * drho) + h44 = -1.0 / (g * drho) + h45 = -u_avg[5] / (g * u_avg[4] * drho) + h46 = -u_avg[6] / (g * u_avg[4] * drho) + h51 = u_avg[5] / (g * u_avg[4] * drho) + h52 = u_avg[2] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho) + h53 = u_avg[3] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho) + h54 = -u_avg[5] / (g * u_avg[4] * drho) + h55 = ((g * rho_upper * u_avg[4]^3 - g * rho_lower * u_avg[4]^3 + + -rho_lower * u_avg[5]^2) / (g * rho_lower * u_avg[4]^2 * drho)) + h56 = -u_avg[5] * u_avg[6] / (g * u_avg[4]^2 * drho) + h61 = u_avg[6] / (g * u_avg[4] * drho) + h62 = u_avg[2] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho) + h63 = u_avg[3] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho) + h64 = -u_avg[6] / (g * u_avg[4] * drho) + h65 = -u_avg[5] * u_avg[6] / (g * u_avg[4]^2 * drho) + h66 = ((g * rho_upper * u_avg[4]^3 - g * rho_lower * u_avg[4]^3 + + -rho_lower * u_avg[6]^2) / (g * rho_lower * u_avg[4]^2 * drho)) + + # Entropy Jacobian matrix + H = @SMatrix [[h11;; h12;; h13;; h14;; h15;; h16;; 0]; + [h21;; h22;; h23;; h24;; h25;; h26;; 0]; + [h31;; h32;; h33;; h34;; h35;; h36;; 0]; + [h41;; h42;; h43;; h44;; h45;; h46;; 0]; + [h51;; h52;; h53;; h54;; h55;; h56;; 0]; + [h61;; h62;; h63;; h64;; h65;; h66;; 0]; + [0;; 0;; 0;; 0;; 0;; 0;; 0]] + + # Add dissipation to entropy conservative flux to obtain entropy stable flux + f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll) + + return SVector(f_es[1], f_es[2], f_es[3], f_es[4], f_es[5], f_es[6], + zero(eltype(u_ll))) end - # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the # maximum velocity magnitude plus the maximum speed of sound. This function uses approximate # eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them @@ -738,199 +798,198 @@ end # - Jonas Nycander, Andrew McC. Hogg, Leela M. Frankcombe (2008) # Open boundary conditions for nonlinear channel Flows # [DOI: 10.1016/j.ocemod.2008.06.003](https://doi.org/10.1016/j.ocemod.2008.06.003) -@inline function max_abs_speed_naive(u_ll, u_rr, - orientation::Integer, +@inline function max_abs_speed_naive(u_ll, u_rr, + orientation::Integer, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll - h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr - - # Calculate averaged velocity of both layers - if orientation == 1 - v_m_ll = (h_v1_upper_ll + h_v1_lower_ll) / (h_upper_ll + h_lower_ll) - v_m_rr = (h_v1_upper_rr + h_v1_lower_rr) / (h_upper_rr + h_lower_rr) - else - v_m_ll = (h_v2_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll) - v_m_rr = (h_v2_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr) - end - - # Calculate the wave celerity on the left and right - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - - c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll) ) - c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) - - return (max(abs(v_m_ll),abs(v_m_rr)) + max(c_ll,c_rr)) + # Unpack left and right state + h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll + h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr + + # Calculate averaged velocity of both layers + if orientation == 1 + v_m_ll = (h_v1_upper_ll + h_v1_lower_ll) / (h_upper_ll + h_lower_ll) + v_m_rr = (h_v1_upper_rr + h_v1_lower_rr) / (h_upper_rr + h_lower_rr) + else + v_m_ll = (h_v2_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll) + v_m_rr = (h_v2_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr) + end + + # Calculate the wave celerity on the left and right + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + + c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll)) + c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) + + return (max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr)) end - -@inline function max_abs_speed_naive(u_ll, u_rr, +@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterTwoLayerEquations2D) - # Unpack left and right state - h_upper_ll, _, _, h_lower_ll, _, _, _ = u_ll - h_upper_rr, _, _, h_lower_rr, _, _, _ = u_rr - - # Extract and compute the velocities in the normal direction - v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) - v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) - - v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + v2_upper_ll * normal_direction[2] - v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + v2_upper_rr * normal_direction[2] - v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + v2_lower_ll * normal_direction[2] - v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + v2_lower_rr * normal_direction[2] - - # Calculate averaged velocity of both layers - v_m_ll = (v_upper_dot_n_ll * h_upper_ll + v_lower_dot_n_ll * h_lower_ll) / (h_upper_ll + h_lower_ll) - v_m_rr = (v_upper_dot_n_rr * h_upper_rr + v_lower_dot_n_rr * h_lower_rr) / (h_upper_rr + h_lower_rr) - - # Compute the wave celerity on the left and right - h_upper_ll, h_lower_ll = waterheight(u_ll, equations) - h_upper_rr, h_lower_rr = waterheight(u_rr, equations) - - c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll)) - c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) - - # The normal velocities are already scaled by the norm - return max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr) * norm(normal_direction) + # Unpack left and right state + h_upper_ll, _, _, h_lower_ll, _, _, _ = u_ll + h_upper_rr, _, _, h_lower_rr, _, _, _ = u_rr + + # Extract and compute the velocities in the normal direction + v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations) + v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations) + + v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + + v2_upper_ll * normal_direction[2] + v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + + v2_upper_rr * normal_direction[2] + v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + + v2_lower_ll * normal_direction[2] + v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + + v2_lower_rr * normal_direction[2] + + # Calculate averaged velocity of both layers + v_m_ll = (v_upper_dot_n_ll * h_upper_ll + v_lower_dot_n_ll * h_lower_ll) / + (h_upper_ll + h_lower_ll) + v_m_rr = (v_upper_dot_n_rr * h_upper_rr + v_lower_dot_n_rr * h_lower_rr) / + (h_upper_rr + h_lower_rr) + + # Compute the wave celerity on the left and right + h_upper_ll, h_lower_ll = waterheight(u_ll, equations) + h_upper_rr, h_lower_rr = waterheight(u_rr, equations) + + c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll)) + c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr)) + + # The normal velocities are already scaled by the norm + return max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr) * norm(normal_direction) end - # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography -@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, - orientation_or_normal_direction, equations::ShallowWaterTwoLayerEquations2D) - λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations) - diss = -0.5 * λ * (u_rr - u_ll) - return SVector(diss[1], diss[2], diss[3], diss[4], diss[5], diss[6], zero(eltype(u_ll))) +@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, + orientation_or_normal_direction, + equations::ShallowWaterTwoLayerEquations2D) + λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, + equations) + diss = -0.5 * λ * (u_rr - u_ll) + return SVector(diss[1], diss[2], diss[3], diss[4], diss[5], diss[6], + zero(eltype(u_ll))) end - # Absolute speed of the barotropic mode @inline function max_abs_speeds(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u + h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u - # Calculate averaged velocity of both layers - v1_m = (h_v1_upper + h_v1_lower) / (h_upper + h_lower) - v2_m = (h_v2_upper + h_v2_lower) / (h_upper + h_lower) + # Calculate averaged velocity of both layers + v1_m = (h_v1_upper + h_v1_lower) / (h_upper + h_lower) + v2_m = (h_v2_upper + h_v2_lower) / (h_upper + h_lower) - h_upper, h_lower = waterheight(u, equations) - v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) + h_upper, h_lower = waterheight(u, equations) + v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) - c = sqrt(equations.gravity * (h_upper + h_lower)) - return (max(abs(v1_m) + c, abs(v1_upper), abs(v1_lower)), - max(abs(v2_m) + c, abs(v2_upper), abs(v2_lower))) + c = sqrt(equations.gravity * (h_upper + h_lower)) + return (max(abs(v1_m) + c, abs(v1_upper), abs(v1_lower)), + max(abs(v2_m) + c, abs(v2_upper), abs(v2_lower))) end - # Helper function to extract the velocity vector from the conservative variables @inline function velocity(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u + h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u - v1_upper = h_v1_upper / h_upper - v2_upper = h_v2_upper / h_upper - v1_lower = h_v1_lower / h_lower - v2_lower = h_v2_lower / h_lower + v1_upper = h_v1_upper / h_upper + v2_upper = h_v2_upper / h_upper + v1_lower = h_v1_lower / h_lower + v2_lower = h_v2_lower / h_lower - return SVector(v1_upper, v2_upper, v1_lower, v2_lower) + return SVector(v1_upper, v2_upper, v1_lower, v2_lower) end - # Convert conservative variables to primitive @inline function cons2prim(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, _, _, h_lower, _, _, b = u + h_upper, _, _, h_lower, _, _, b = u - H_lower = h_lower + b - H_upper = h_lower + h_upper + b - v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) + H_lower = h_lower + b + H_upper = h_lower + h_upper + b + v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) - return SVector(H_upper, v1_upper, v2_upper , H_lower, v1_lower, v2_lower, b) + return SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b) end - # Convert conservative variables to entropy variables # Note, only the first four are the entropy variables, the fifth entry still just carries the bottom # topography values for convenience. # In contrast to general usage the entropy variables are denoted with q instead of w, because w is # already used for velocity in y-Direction @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, _, _, h_lower, _, _, b = u - # Assign new variables for better readability - rho_upper = equations.rho_upper - rho_lower = equations.rho_lower - v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) - - w1 = rho_upper * (equations.gravity * ( h_upper + h_lower + b) + - - 0.5 * (v1_upper^2 + v2_upper^2)) - w2 = rho_upper * v1_upper - w3 = rho_upper * v2_upper - w4 = rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) + - - 0.5 * (v1_lower^2 + v2_lower^2)) - w5 = rho_lower * v1_lower - w6 = rho_lower * v2_lower - return SVector(w1, w2, w3, w4, w5, w6, b) + h_upper, _, _, h_lower, _, _, b = u + # Assign new variables for better readability + rho_upper = equations.rho_upper + rho_lower = equations.rho_lower + v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations) + + w1 = rho_upper * (equations.gravity * (h_upper + h_lower + b) + + -0.5 * (v1_upper^2 + v2_upper^2)) + w2 = rho_upper * v1_upper + w3 = rho_upper * v2_upper + w4 = rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) + + -0.5 * (v1_lower^2 + v2_lower^2)) + w5 = rho_lower * v1_lower + w6 = rho_lower * v2_lower + return SVector(w1, w2, w3, w4, w5, w6, b) end - # Convert primitive to conservative variables @inline function prim2cons(prim, equations::ShallowWaterTwoLayerEquations2D) - H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b = prim - - h_lower = H_lower - b - h_upper = H_upper - h_lower - b - h_v1_upper = h_upper * v1_upper - h_v2_upper = h_upper * v2_upper - h_v1_lower = h_lower * v1_lower - h_v2_lower = h_lower * v2_lower - return SVector(h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, b) + H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b = prim + + h_lower = H_lower - b + h_upper = H_upper - h_lower - b + h_v1_upper = h_upper * v1_upper + h_v2_upper = h_upper * v2_upper + h_v1_lower = h_lower * v1_lower + h_v2_lower = h_lower * v2_lower + return SVector(h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, b) end - @inline function waterheight(u, equations::ShallowWaterTwoLayerEquations2D) - return SVector(u[1], u[4]) + return SVector(u[1], u[4]) end - # Entropy function for the shallow water equations is the total energy -@inline entropy(cons, equations::ShallowWaterTwoLayerEquations2D) = energy_total(cons, equations) - +@inline function entropy(cons, equations::ShallowWaterTwoLayerEquations2D) + energy_total(cons, equations) +end # Calculate total energy for a conservative state `cons` @inline function energy_total(cons, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, b = cons - g = equations.gravity - rho_upper= equations.rho_upper - rho_lower= equations.rho_lower - - e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + h_v2_upper^2 / h_upper + g * h_upper^2) + - 0.5 * rho_lower * (h_v2_lower^2 / h_lower + h_v2_lower^2 / h_lower + g * h_lower^2) + - g*rho_lower*h_lower*b + g*rho_upper*h_upper*(h_lower + b)) - return e + h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, b = cons + g = equations.gravity + rho_upper = equations.rho_upper + rho_lower = equations.rho_lower + + e = (0.5 * rho_upper * + (h_v1_upper^2 / h_upper + h_v2_upper^2 / h_upper + g * h_upper^2) + + 0.5 * rho_lower * + (h_v2_lower^2 / h_lower + h_v2_lower^2 / h_lower + g * h_lower^2) + + g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b)) + return e end - # Calculate kinetic energy for a conservative state `cons` @inline function energy_kinetic(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, _ = u + h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, _ = u - return (0.5 * equations.rho_upper * h_v1_upper^2 / h_upper + - 0.5 * equations.rho_upper * h_v2_upper^2 / h_upper + - 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower + - 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower) + return (0.5 * equations.rho_upper * h_v1_upper^2 / h_upper + + 0.5 * equations.rho_upper * h_v2_upper^2 / h_upper + + 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower + + 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower) end - # Calculate potential energy for a conservative state `cons` @inline function energy_internal(cons, equations::ShallowWaterTwoLayerEquations2D) - return energy_total(cons, equations) - energy_kinetic(cons, equations) + return energy_total(cons, equations) - energy_kinetic(cons, equations) end - # Calculate the error for the "lake-at-rest" test case where H = h_upper+h_lower+b should # be a constant value over time @inline function lake_at_rest_error(u, equations::ShallowWaterTwoLayerEquations2D) - h_upper, _, _, h_lower, _, _, b = u - return abs(equations.H0 - (h_upper + h_lower + b)) + h_upper, _, _, h_lower, _, _, b = u + return abs(equations.H0 - (h_upper + h_lower + b)) end - end # @muladd diff --git a/src/meshes/abstract_tree.jl b/src/meshes/abstract_tree.jl index 7d075d66c82..469189ff50c 100644 --- a/src/meshes/abstract_tree.jl +++ b/src/meshes/abstract_tree.jl @@ -3,20 +3,21 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent abstract type AbstractTree{NDIMS} <: AbstractContainer end # Type traits to obtain dimension -@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS - +@inline Base.ndims(::AbstractTree{NDIMS}) where {NDIMS} = NDIMS # Auxiliary methods to allow semantic queries on the tree # Check whether cell has parent cell has_parent(t::AbstractTree, cell_id::Int) = t.parent_ids[cell_id] > 0 # Count number of children for a given cell -n_children(t::AbstractTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) +function n_children(t::AbstractTree, cell_id::Int) + count(x -> (x > 0), @view t.child_ids[:, cell_id]) +end # Check whether cell has any child cell has_children(t::AbstractTree, cell_id::Int) = n_children(t, cell_id) > 0 @@ -28,16 +29,19 @@ is_leaf(t::AbstractTree, cell_id::Int) = !has_children(t, cell_id) has_child(t::AbstractTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 # Check if cell has a neighbor at the same refinement level in the given direction -has_neighbor(t::AbstractTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 +function has_neighbor(t::AbstractTree, cell_id::Int, direction::Int) + t.neighbor_ids[direction, cell_id] > 0 +end # Check if cell has a coarse neighbor, i.e., with one refinement level lower function has_coarse_neighbor(t::AbstractTree, cell_id::Int, direction::Int) - return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) + return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) end # Check if cell has any neighbor (same-level or lower-level) function has_any_neighbor(t::AbstractTree, cell_id::Int, direction::Int) - return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction) + return has_neighbor(t, cell_id, direction) || + has_coarse_neighbor(t, cell_id, direction) end # Check if cell is own cell, i.e., belongs to this MPI rank @@ -59,10 +63,9 @@ maximum_level(t::AbstractTree) = maximum(t.levels[leaf_cells(t)]) isperiodic(t::AbstractTree) = all(t.periodicity) isperiodic(t::AbstractTree, dimension) = t.periodicity[dimension] - # Auxiliary methods for often-required calculations # Number of potential child cells -n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS +n_children_per_cell(::AbstractTree{NDIMS}) where {NDIMS} = 2^NDIMS # Number of directions # @@ -73,7 +76,7 @@ n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS # 4 -> +y # 5 -> -z # 6 -> +z -@inline n_directions(::AbstractTree{NDIMS}) where NDIMS = 2 * NDIMS +@inline n_directions(::AbstractTree{NDIMS}) where {NDIMS} = 2 * NDIMS # TODO: Taal performance, 1:n_directions(tree) vs. Base.OneTo(n_directions(tree)) vs. SOneTo(n_directions(tree)) """ eachdirection(tree::AbstractTree) @@ -121,7 +124,6 @@ const _child_signs = [-1 -1 -1; +1 +1 +1] child_sign(child::Int, dim::Int) = _child_signs[child, dim] - # For each child position (1 to 8) and a given direction (from 1 to 6), return # neighboring child position. const _adjacent_child_ids = [2 2 3 3 5 5; @@ -134,310 +136,298 @@ const _adjacent_child_ids = [2 2 3 3 5 5; 7 7 6 6 4 4] adjacent_child(child::Int, direction::Int) = _adjacent_child_ids[child, direction] - # For each child position (1 to 8) and a given direction (from 1 to 6), return # if neighbor is a sibling function has_sibling(child::Int, direction::Int) - return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 + return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 end - # Obtain leaf cells that fulfill a given criterion. # # The function `f` is passed the cell id of each leaf cell # as an argument. function filter_leaf_cells(f, t::AbstractTree) - filtered = Vector{Int}(undef, length(t)) - count = 0 - for cell_id in 1:length(t) - if is_leaf(t, cell_id) && f(cell_id) - count += 1 - filtered[count] = cell_id + filtered = Vector{Int}(undef, length(t)) + count = 0 + for cell_id in 1:length(t) + if is_leaf(t, cell_id) && f(cell_id) + count += 1 + filtered[count] = cell_id + end end - end - return filtered[1:count] + return filtered[1:count] end - # Return an array with the ids of all leaf cells -leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id)->true, t) - +leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id) -> true, t) # Return an array with the ids of all leaf cells for a given rank leaf_cells_by_rank(t::AbstractTree, rank) = leaf_cells(t) - # Return an array with the ids of all local leaf cells local_leaf_cells(t::AbstractTree) = leaf_cells(t) - # Count the number of leaf cells. count_leaf_cells(t::AbstractTree) = length(leaf_cells(t)) - @inline function cell_coordinates(t::AbstractTree{NDIMS}, cell) where {NDIMS} - SVector(ntuple(d -> t.coordinates[d, cell], Val(NDIMS))) + SVector(ntuple(d -> t.coordinates[d, cell], Val(NDIMS))) end -@inline function set_cell_coordinates!(t::AbstractTree{NDIMS}, coords, cell) where {NDIMS} - for d in 1:NDIMS - t.coordinates[d, cell] = coords[d] - end +@inline function set_cell_coordinates!(t::AbstractTree{NDIMS}, coords, + cell) where {NDIMS} + for d in 1:NDIMS + t.coordinates[d, cell] = coords[d] + end end - # Determine if point is located inside cell function is_point_in_cell(t::AbstractTree, point_coordinates, cell_id) - cell_length = length_at_cell(t, cell_id) - cell_coordinates_ = cell_coordinates(t, cell_id) - min_coordinates = cell_coordinates_ .- cell_length / 2 - max_coordinates = cell_coordinates_ .+ cell_length / 2 + cell_length = length_at_cell(t, cell_id) + cell_coordinates_ = cell_coordinates(t, cell_id) + min_coordinates = cell_coordinates_ .- cell_length / 2 + max_coordinates = cell_coordinates_ .+ cell_length / 2 - return all(min_coordinates .<= point_coordinates .<= max_coordinates) + return all(min_coordinates .<= point_coordinates .<= max_coordinates) end - # Store cell id in each cell to use for post-AMR analysis function reset_original_cell_ids!(t::AbstractTree) - t.original_cell_ids[1:length(t)] .= 1:length(t) + t.original_cell_ids[1:length(t)] .= 1:length(t) end - # Efficiently perform uniform refinement up to a given level (works only on mesh with a single cell) function refine_uniformly!(t::AbstractTree, max_level) - @assert length(t) == 1 "efficient uniform refinement only works for a newly created tree" - @assert max_level >= 0 "the uniform refinement level must be non-zero" + @assert length(t)==1 "efficient uniform refinement only works for a newly created tree" + @assert max_level>=0 "the uniform refinement level must be non-zero" - # Calculate size of final tree and resize tree - total_length = 1 - for level in 1:max_level - total_length += n_children_per_cell(t)^level - end - resize!(t, total_length) + # Calculate size of final tree and resize tree + total_length = 1 + for level in 1:max_level + total_length += n_children_per_cell(t)^level + end + resize!(t, total_length) - # Traverse tree to set parent-child relationships - init_children!(t, 1, max_level) + # Traverse tree to set parent-child relationships + init_children!(t, 1, max_level) - # Set all neighbor relationships - init_neighbors!(t, max_level) + # Set all neighbor relationships + init_neighbors!(t, max_level) end - # Recursively initialize children up to level `max_level` in depth-first ordering, starting with # cell `cell_id` and set all information except neighbor relations (see `init_neighbors!`). # # Return the number of offspring of the initialized cell plus one function init_children!(t::AbstractTree, cell_id, max_level) - # Stop recursion if max_level has been reached - if t.levels[cell_id] >= max_level - return 1 - else - # Initialize each child cell, counting the total number of offspring - n_offspring = 0 - for child in 1:n_children_per_cell(t) - # Get cell id of child - child_id = cell_id + 1 + n_offspring - - # Initialize child cell (except neighbors) - init_child!(t, cell_id, child, child_id) - - # Recursively initialize child cell - n_offspring += init_children!(t, child_id, max_level) - end + # Stop recursion if max_level has been reached + if t.levels[cell_id] >= max_level + return 1 + else + # Initialize each child cell, counting the total number of offspring + n_offspring = 0 + for child in 1:n_children_per_cell(t) + # Get cell id of child + child_id = cell_id + 1 + n_offspring - return n_offspring + 1 - end -end + # Initialize child cell (except neighbors) + init_child!(t, cell_id, child, child_id) + # Recursively initialize child cell + n_offspring += init_children!(t, child_id, max_level) + end + + return n_offspring + 1 + end +end # Iteratively set all neighbor relations, starting at an initialized level 0 cell. Assume that # parent-child relations have already been initialized (see `init_children!`). -function init_neighbors!(t::AbstractTree, max_level=maximum_level(t)) - @assert all(n >= 0 for n in t.neighbor_ids[:, 1]) "level 0 cell neighbors must be initialized" - - # Initialize neighbors level by level - for level in 1:max_level - # Walk entire tree, starting from level 0 cell - for cell_id in 1:length(t) - # Skip cells whose immediate children are already initialized *or* whose level is too high for this round - if t.levels[cell_id] != level - 1 - continue - end - - # Iterate over children and set neighbor information - for child in 1:n_children_per_cell(t) - child_id = t.child_ids[child, cell_id] - init_child_neighbors!(t, cell_id, child, child_id) - end +function init_neighbors!(t::AbstractTree, max_level = maximum_level(t)) + @assert all(n >= 0 for n in t.neighbor_ids[:, 1]) "level 0 cell neighbors must be initialized" + + # Initialize neighbors level by level + for level in 1:max_level + # Walk entire tree, starting from level 0 cell + for cell_id in 1:length(t) + # Skip cells whose immediate children are already initialized *or* whose level is too high for this round + if t.levels[cell_id] != level - 1 + continue + end + + # Iterate over children and set neighbor information + for child in 1:n_children_per_cell(t) + child_id = t.child_ids[child, cell_id] + init_child_neighbors!(t, cell_id, child, child_id) + end + end end - end - return nothing + return nothing end - # Initialize the neighbors of child cell `child_id` based on parent cell `cell_id` function init_child_neighbors!(t::AbstractTree, cell_id, child, child_id) - t.neighbor_ids[:, child_id] .= zero(eltype(t.neighbor_ids)) - for direction in eachdirection(t) - # If neighbor is a sibling, establish one-sided connectivity - # Note: two-sided is not necessary, as each sibling will do this - if has_sibling(child, direction) - adjacent = adjacent_child(child, direction) - neighbor_id = t.child_ids[adjacent, cell_id] - - t.neighbor_ids[direction, child_id] = neighbor_id - continue - end + t.neighbor_ids[:, child_id] .= zero(eltype(t.neighbor_ids)) + for direction in eachdirection(t) + # If neighbor is a sibling, establish one-sided connectivity + # Note: two-sided is not necessary, as each sibling will do this + if has_sibling(child, direction) + adjacent = adjacent_child(child, direction) + neighbor_id = t.child_ids[adjacent, cell_id] - # Skip if original cell does have no neighbor in direction - if !has_neighbor(t, cell_id, direction) - continue - end + t.neighbor_ids[direction, child_id] = neighbor_id + continue + end - # Otherwise, check if neighbor has children - if not, skip again - neighbor_id = t.neighbor_ids[direction, cell_id] - if !has_children(t, neighbor_id) - continue - end + # Skip if original cell does have no neighbor in direction + if !has_neighbor(t, cell_id, direction) + continue + end - # Check if neighbor has corresponding child and if yes, establish connectivity - adjacent = adjacent_child(child, direction) - if has_child(t, neighbor_id, adjacent) - neighbor_child_id = t.child_ids[adjacent, neighbor_id] - opposite = opposite_direction(direction) + # Otherwise, check if neighbor has children - if not, skip again + neighbor_id = t.neighbor_ids[direction, cell_id] + if !has_children(t, neighbor_id) + continue + end + + # Check if neighbor has corresponding child and if yes, establish connectivity + adjacent = adjacent_child(child, direction) + if has_child(t, neighbor_id, adjacent) + neighbor_child_id = t.child_ids[adjacent, neighbor_id] + opposite = opposite_direction(direction) - t.neighbor_ids[direction, child_id] = neighbor_child_id - t.neighbor_ids[opposite, neighbor_child_id] = child_id + t.neighbor_ids[direction, child_id] = neighbor_child_id + t.neighbor_ids[opposite, neighbor_child_id] = child_id + end end - end - return nothing + return nothing end - # Refine given cells without rebalancing tree. # # Note: After a call to this method the tree may be unbalanced! -function refine_unbalanced!(t::AbstractTree, cell_ids, sorted_unique_cell_ids=sort(unique(cell_ids))) - # Store actual ids refined cells (shifted due to previous insertions) - refined = zeros(Int, length(cell_ids)) - - # Loop over all cells that are to be refined - for (count, original_cell_id) in enumerate(sorted_unique_cell_ids) - # Determine actual cell id, taking into account previously inserted cells - n_children = n_children_per_cell(t) - cell_id = original_cell_id + (count - 1) * n_children - refined[count] = cell_id - - @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" - - # Insert new cells directly behind parent (depth-first) - insert!(t, cell_id + 1, n_children) - - # Flip sign of refined cell such that we can easily find it later - t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] - - # Initialize child cells (except neighbors) - for child in 1:n_children - child_id = cell_id + child - init_child!(t, cell_id, child, child_id) - end +function refine_unbalanced!(t::AbstractTree, cell_ids, + sorted_unique_cell_ids = sort(unique(cell_ids))) + # Store actual ids refined cells (shifted due to previous insertions) + refined = zeros(Int, length(cell_ids)) + + # Loop over all cells that are to be refined + for (count, original_cell_id) in enumerate(sorted_unique_cell_ids) + # Determine actual cell id, taking into account previously inserted cells + n_children = n_children_per_cell(t) + cell_id = original_cell_id + (count - 1) * n_children + refined[count] = cell_id + + @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" + + # Insert new cells directly behind parent (depth-first) + insert!(t, cell_id + 1, n_children) + + # Flip sign of refined cell such that we can easily find it later + t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] + + # Initialize child cells (except neighbors) + for child in 1:n_children + child_id = cell_id + child + init_child!(t, cell_id, child, child_id) + end - # Initialize child cells (only neighbors) - # This separate loop is required since init_child_neighbors requires initialized parent-child - # relationships - for child in 1:n_children - child_id = cell_id + child - init_child_neighbors!(t, cell_id, child, child_id) + # Initialize child cells (only neighbors) + # This separate loop is required since init_child_neighbors requires initialized parent-child + # relationships + for child in 1:n_children + child_id = cell_id + child + init_child_neighbors!(t, cell_id, child, child_id) + end end - end - return refined + return refined end - # Refine entire tree by one level function refine!(t::AbstractTree) - cells = @trixi_timeit timer() "collect all leaf cells" leaf_cells(t) - @trixi_timeit timer() "refine!" refine!(t, cells, cells) + cells = @trixi_timeit timer() "collect all leaf cells" leaf_cells(t) + @trixi_timeit timer() "refine!" refine!(t, cells, cells) end - # Refine given cells and rebalance tree. # # Note 1: Rebalancing is iterative, i.e., neighboring cells are refined if # otherwise the 2:1 rule would be violated, which can cause more # refinements. # Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! -function refine!(t::AbstractTree, cell_ids, sorted_unique_cell_ids=sort(unique(cell_ids))) - # Reset original cell ids such that each cell knows its current id - reset_original_cell_ids!(t) - - # Refine all requested cells - refined = @trixi_timeit timer() "refine_unbalanced!" refine_unbalanced!(t, cell_ids, sorted_unique_cell_ids) - refinement_count = length(refined) - - # Iteratively rebalance the tree until it does not change anymore - while length(refined) > 0 - refined = @trixi_timeit timer() "rebalance!" rebalance!(t, refined) - refinement_count += length(refined) - end - - # Determine list of *original* cell ids that were refined - # Note: original_cell_ids contains the cell_id *before* refinement. At - # refinement, the refined cell's original_cell_ids value has its sign flipped - # to easily find it now. - refined_original_cells = @views( - -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) - - # Check if count of refinement cells matches information in original_cell_ids - @assert refinement_count == length(refined_original_cells) ( - "Mismatch in number of refined cells") - - return refined_original_cells -end +function refine!(t::AbstractTree, cell_ids, + sorted_unique_cell_ids = sort(unique(cell_ids))) + # Reset original cell ids such that each cell knows its current id + reset_original_cell_ids!(t) + + # Refine all requested cells + refined = @trixi_timeit timer() "refine_unbalanced!" refine_unbalanced!(t, cell_ids, + sorted_unique_cell_ids) + refinement_count = length(refined) + + # Iteratively rebalance the tree until it does not change anymore + while length(refined) > 0 + refined = @trixi_timeit timer() "rebalance!" rebalance!(t, refined) + refinement_count += length(refined) + end + + # Determine list of *original* cell ids that were refined + # Note: original_cell_ids contains the cell_id *before* refinement. At + # refinement, the refined cell's original_cell_ids value has its sign flipped + # to easily find it now. + refined_original_cells = @views(-t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) + # Check if count of refinement cells matches information in original_cell_ids + @assert refinement_count==length(refined_original_cells) ("Mismatch in number of refined cells") + + return refined_original_cells +end # Refine all leaf cells with coordinates in a given rectangular box -function refine_box!(t::AbstractTree{NDIMS}, coordinates_min, coordinates_max) where NDIMS - for dim in 1:NDIMS - @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." - end - - # Find all leaf cells within box - cells = filter_leaf_cells(t) do cell_id - return (all(coordinates_min .< cell_coordinates(t, cell_id)) && - all(coordinates_max .> cell_coordinates(t, cell_id))) - end - - # Refine cells - refine!(t, cells) +function refine_box!(t::AbstractTree{NDIMS}, coordinates_min, + coordinates_max) where {NDIMS} + for dim in 1:NDIMS + @assert coordinates_min[dim] cell_coordinates(t, cell_id))) + end + + # Refine cells + refine!(t, cells) end # Convenience method for 1D function refine_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) - return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) + return refine_box!(t, [convert(Float64, coordinates_min)], + [convert(Float64, coordinates_max)]) end - # Refine all leaf cells with coordinates in a given sphere -function refine_sphere!(t::AbstractTree{NDIMS}, center::SVector{NDIMS}, radius) where NDIMS - @assert radius >= 0 "Radius must be positive." +function refine_sphere!(t::AbstractTree{NDIMS}, center::SVector{NDIMS}, + radius) where {NDIMS} + @assert radius>=0 "Radius must be positive." - # Find all leaf cells within sphere - cells = filter_leaf_cells(t) do cell_id - return sum(abs2, cell_coordinates(t, cell_id) - center) < radius^2 - end + # Find all leaf cells within sphere + cells = filter_leaf_cells(t) do cell_id + return sum(abs2, cell_coordinates(t, cell_id) - center) < radius^2 + end - # Refine cells - refine!(t, cells) + # Refine cells + refine!(t, cells) end # Convenience function to allow passing center as a tuple -function refine_sphere!(t::AbstractTree{NDIMS}, center::NTuple{NDIMS}, radius) where NDIMS - refine_sphere!(t, SVector(center), radius) +function refine_sphere!(t::AbstractTree{NDIMS}, center::NTuple{NDIMS}, + radius) where {NDIMS} + refine_sphere!(t, SVector(center), radius) end # For the given cell ids, check if neighbors need to be refined to restore a rebalanced tree. @@ -447,42 +437,41 @@ end # created level differences of at most 2. That is, before the previous # refinement step, the tree was balanced. function rebalance!(t::AbstractTree, refined_cell_ids) - # Create buffer for newly refined cells - to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) - count = 0 - - # Iterate over cell ids that have previously been refined - for cell_id in refined_cell_ids - # Go over all potential neighbors of child cell - for direction in eachdirection(t) - # Continue if refined cell has a neighbor in that direction - if has_neighbor(t, cell_id, direction) - continue - end - - # Continue if refined cell has no coarse neighbor, since that would - # mean it there is no neighbor in that direction at all (domain - # boundary) - if !has_coarse_neighbor(t, cell_id, direction) - continue - end - - # Otherwise, the coarse neighbor exists and is not refined, thus it must - # be marked for refinement - coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]] - count += 1 - to_refine[count] = coarse_neighbor_id + # Create buffer for newly refined cells + to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) + count = 0 + + # Iterate over cell ids that have previously been refined + for cell_id in refined_cell_ids + # Go over all potential neighbors of child cell + for direction in eachdirection(t) + # Continue if refined cell has a neighbor in that direction + if has_neighbor(t, cell_id, direction) + continue + end + + # Continue if refined cell has no coarse neighbor, since that would + # mean it there is no neighbor in that direction at all (domain + # boundary) + if !has_coarse_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, the coarse neighbor exists and is not refined, thus it must + # be marked for refinement + coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]] + count += 1 + to_refine[count] = coarse_neighbor_id + end end - end - # Finally, refine all marked cells... - refined = refine_unbalanced!(t, unique(to_refine[1:count])) + # Finally, refine all marked cells... + refined = refine_unbalanced!(t, unique(to_refine[1:count])) - # ...and return list of refined cells - return refined + # ...and return list of refined cells + return refined end - # Refine given cells without rebalancing tree. # # Note: After a call to this method the tree may be unbalanced! @@ -491,19 +480,17 @@ end # Wrap single-cell refinements such that `sort(...)` does not complain refine_unbalanced!(t::AbstractTree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) - # Coarsen entire tree by one level function coarsen!(t::AbstractTree) - # Special case: if there is only one cell (root), there is nothing to do - if length(t) == 1 - return Int[] - end - - # Get list of unique parent ids for all leaf cells - parent_ids = unique(t.parent_ids[leaf_cells(t)]) - coarsen!(t, parent_ids) -end + # Special case: if there is only one cell (root), there is nothing to do + if length(t) == 1 + return Int[] + end + # Get list of unique parent ids for all leaf cells + parent_ids = unique(t.parent_ids[leaf_cells(t)]) + coarsen!(t, parent_ids) +end # Coarsen given *parent* cells (= these cells must have children who are all # leaf cells) while retaining a balanced tree. @@ -513,165 +500,163 @@ end # coarsened without specifically asking for it, these cells will then *not* be # coarsened. function coarsen!(t::AbstractTree, cell_ids::AbstractArray{Int}) - # Return early if array is empty - if length(cell_ids) == 0 - return Int[] - end - - # Reset original cell ids such that each cell knows its current id - reset_original_cell_ids!(t) - - # To maximize the number of cells that may be coarsened, start with the cells at the highest level - sorted_by_level = sort(cell_ids, by = i -> t.levels[i]) - - # Keep track of number of cells that were actually coarsened - n_coarsened = 0 - - # Local function to adjust cell ids after some cells have been removed - function adjust_cell_ids!(cell_ids, coarsened_cell_id, count) - for (id, cell_id) in enumerate(cell_ids) - if cell_id > coarsened_cell_id - cell_ids[id] = cell_id - count - end + # Return early if array is empty + if length(cell_ids) == 0 + return Int[] end - end - # Iterate backwards over cells to coarsen - while true - # Retrieve next cell or quit - if length(sorted_by_level) > 0 - coarse_cell_id = pop!(sorted_by_level) - else - break - end + # Reset original cell ids such that each cell knows its current id + reset_original_cell_ids!(t) - # Ensure that cell has children (violation is an error) - if !has_children(t, coarse_cell_id) - error("cell is leaf and cannot be coarsened to: $coarse_cell_id") - end + # To maximize the number of cells that may be coarsened, start with the cells at the highest level + sorted_by_level = sort(cell_ids, by = i -> t.levels[i]) - # Ensure that all child cells are leaf cells (violation is an error) - for child in 1:n_children_per_cell(t) - if has_child(t, coarse_cell_id, child) - if !is_leaf(t, t.child_ids[child, coarse_cell_id]) - error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell") + # Keep track of number of cells that were actually coarsened + n_coarsened = 0 + + # Local function to adjust cell ids after some cells have been removed + function adjust_cell_ids!(cell_ids, coarsened_cell_id, count) + for (id, cell_id) in enumerate(cell_ids) + if cell_id > coarsened_cell_id + cell_ids[id] = cell_id - count + end end - end end - # Check if coarse cell has refined neighbors that would prevent coarsening - skip = false - # Iterate over all children (which are to be removed) - for child in 1:n_children_per_cell(t) - # Continue if child does not exist - if !has_child(t, coarse_cell_id, child) - continue - end - child_id = t.child_ids[child, coarse_cell_id] - - # Go over all neighbors of child cell. If it has a neighbor that is *not* - # a sibling and that is not a leaf cell, we cannot coarsen its parent - # without creating an unbalanced tree. - for direction in eachdirection(t) - # Continue if neighbor would be a sibling - if has_sibling(child, direction) - continue + # Iterate backwards over cells to coarsen + while true + # Retrieve next cell or quit + if length(sorted_by_level) > 0 + coarse_cell_id = pop!(sorted_by_level) + else + break end - # Continue if child cell has no neighbor in that direction - if !has_neighbor(t, child_id, direction) - continue + # Ensure that cell has children (violation is an error) + if !has_children(t, coarse_cell_id) + error("cell is leaf and cannot be coarsened to: $coarse_cell_id") end - neighbor_id = t.neighbor_ids[direction, child_id] - if !has_children(t, neighbor_id) - continue + # Ensure that all child cells are leaf cells (violation is an error) + for child in 1:n_children_per_cell(t) + if has_child(t, coarse_cell_id, child) + if !is_leaf(t, t.child_ids[child, coarse_cell_id]) + error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell") + end + end end - # If neighbor is not a sibling, is existing, and has children, do not coarsen - skip = true - break - end - end - # Skip if a neighboring cell prevents coarsening - if skip - continue - end + # Check if coarse cell has refined neighbors that would prevent coarsening + skip = false + # Iterate over all children (which are to be removed) + for child in 1:n_children_per_cell(t) + # Continue if child does not exist + if !has_child(t, coarse_cell_id, child) + continue + end + child_id = t.child_ids[child, coarse_cell_id] + + # Go over all neighbors of child cell. If it has a neighbor that is *not* + # a sibling and that is not a leaf cell, we cannot coarsen its parent + # without creating an unbalanced tree. + for direction in eachdirection(t) + # Continue if neighbor would be a sibling + if has_sibling(child, direction) + continue + end + + # Continue if child cell has no neighbor in that direction + if !has_neighbor(t, child_id, direction) + continue + end + neighbor_id = t.neighbor_ids[direction, child_id] + + if !has_children(t, neighbor_id) + continue + end + + # If neighbor is not a sibling, is existing, and has children, do not coarsen + skip = true + break + end + end + # Skip if a neighboring cell prevents coarsening + if skip + continue + end - # Flip sign of cell to be coarsened to such that we can easily find it - t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id] + # Flip sign of cell to be coarsened to such that we can easily find it + t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id] - # If a coarse cell has children that are all leaf cells, they must follow - # immediately due to depth-first ordering of the tree - count = n_children(t, coarse_cell_id) - @assert count == n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells" - remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count) + # If a coarse cell has children that are all leaf cells, they must follow + # immediately due to depth-first ordering of the tree + count = n_children(t, coarse_cell_id) + @assert count==n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells" + remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count) - # Take into account shifts in tree that alters cell ids - adjust_cell_ids!(sorted_by_level, coarse_cell_id, count) + # Take into account shifts in tree that alters cell ids + adjust_cell_ids!(sorted_by_level, coarse_cell_id, count) - # Keep track of number of coarsened cells - n_coarsened += 1 - end + # Keep track of number of coarsened cells + n_coarsened += 1 + end - # Determine list of *original* cell ids that were coarsened to - # Note: original_cell_ids contains the cell_id *before* coarsening. At - # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped - # to easily find it now. - @views coarsened_original_cells = ( - -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) + # Determine list of *original* cell ids that were coarsened to + # Note: original_cell_ids contains the cell_id *before* coarsening. At + # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped + # to easily find it now. + @views coarsened_original_cells = (-t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) - # Check if count of coarsened cells matches information in original_cell_ids - @assert n_coarsened == length(coarsened_original_cells) ( - "Mismatch in number of coarsened cells") + # Check if count of coarsened cells matches information in original_cell_ids + @assert n_coarsened==length(coarsened_original_cells) ("Mismatch in number of coarsened cells") - return coarsened_original_cells + return coarsened_original_cells end # Wrap single-cell coarsening such that `sort(...)` does not complain coarsen!(t::AbstractTree, cell_id::Int) = coarsen!(t::AbstractTree, [cell_id]) - # Coarsen all viable parent cells with coordinates in a given rectangular box function coarsen_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64}, - coordinates_max::AbstractArray{Float64}) where NDIMS - for dim in 1:NDIMS - @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." - end - - # Find all leaf cells within box - leaves = filter_leaf_cells(t) do cell_id - return (all(coordinates_min .< cell_coordinates(t, cell_id)) && - all(coordinates_max .> cell_coordinates(t, cell_id))) - end - - # Get list of unique parent ids for all leaf cells - parent_ids = unique(t.parent_ids[leaves]) - - # Filter parent ids to be within box - parents = filter(parent_ids) do cell_id - return (all(coordinates_min .< cell_coordinates(t, cell_id)) && - all(coordinates_max .> cell_coordinates(t, cell_id))) - end - - # Coarsen cells - coarsen!(t, parents) + coordinates_max::AbstractArray{Float64}) where {NDIMS} + for dim in 1:NDIMS + @assert coordinates_min[dim] cell_coordinates(t, cell_id))) + end + + # Get list of unique parent ids for all leaf cells + parent_ids = unique(t.parent_ids[leaves]) + + # Filter parent ids to be within box + parents = filter(parent_ids) do cell_id + return (all(coordinates_min .< cell_coordinates(t, cell_id)) && + all(coordinates_max .> cell_coordinates(t, cell_id))) + end + + # Coarsen cells + coarsen!(t, parents) end # Convenience method for 1D function coarsen_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) - return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) + return coarsen_box!(t, [convert(Float64, coordinates_min)], + [convert(Float64, coordinates_max)]) end - # Return coordinates of a child cell based on its relative position to the parent. -function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS - # Calculate length of child cells - child_length = parent_length / 2 - return SVector(ntuple(d -> parent_coordinates[d] + child_sign(child, d) * child_length / 2, Val(NDIMS))) +function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates, + parent_length::Number, child::Int) where {NDIMS} + # Calculate length of child cells + child_length = parent_length / 2 + return SVector(ntuple(d -> parent_coordinates[d] + + child_sign(child, d) * child_length / 2, Val(NDIMS))) end - # Reset range of cells to values that are prone to cause errors as soon as they are used. # # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. @@ -679,121 +664,116 @@ end invalidate!(t::AbstractTree, id::Int) = invalidate!(t, id, id) invalidate!(t::AbstractTree) = invalidate!(t, 1, length(t)) - # Delete connectivity with parents/children/neighbors before cells are erased function delete_connectivity!(t::AbstractTree, first::Int, last::Int) - @assert first > 0 - @assert first <= last - @assert last <= t.capacity + 1 - - # Iterate over all cells - for cell_id in first:last - # Delete connectivity from parent cell - if has_parent(t, cell_id) - parent_id = t.parent_ids[cell_id] - for child in 1:n_children_per_cell(t) - if t.child_ids[child, parent_id] == cell_id - t.child_ids[child, parent_id] = 0 - break + @assert first > 0 + @assert first <= last + @assert last <= t.capacity + 1 + + # Iterate over all cells + for cell_id in first:last + # Delete connectivity from parent cell + if has_parent(t, cell_id) + parent_id = t.parent_ids[cell_id] + for child in 1:n_children_per_cell(t) + if t.child_ids[child, parent_id] == cell_id + t.child_ids[child, parent_id] = 0 + break + end + end end - end - end - # Delete connectivity from child cells - for child in 1:n_children_per_cell(t) - if has_child(t, cell_id, child) - t.parent_ids[t._child_ids[child, cell_id]] = 0 - end - end + # Delete connectivity from child cells + for child in 1:n_children_per_cell(t) + if has_child(t, cell_id, child) + t.parent_ids[t._child_ids[child, cell_id]] = 0 + end + end - # Delete connectivity from neighboring cells - for direction in eachdirection(t) - if has_neighbor(t, cell_id, direction) - t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0 - end + # Delete connectivity from neighboring cells + for direction in eachdirection(t) + if has_neighbor(t, cell_id, direction) + t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0 + end + end end - end end - # Move connectivity with parents/children/neighbors after cells have been moved function move_connectivity!(t::AbstractTree, first::Int, last::Int, destination::Int) - @assert first > 0 - @assert first <= last - @assert last <= t.capacity + 1 - @assert destination > 0 - @assert destination <= t.capacity + 1 - - # Strategy - # 1) Loop over moved cells (at target location) - # 2) Check if parent/children/neighbors connections are to a cell that was moved - # a) if cell was moved: apply offset to current cell - # b) if cell was not moved: go to connected cell and update connectivity there - - offset = destination - first - has_moved(n) = (first <= n <= last) - - for source in first:last - target = source + offset - - # Update parent - if has_parent(t, target) - # Get parent cell - parent_id = t.parent_ids[target] - if has_moved(parent_id) - # If parent itself was moved, just update parent id accordingly - t.parent_ids[target] += offset - else - # If parent was not moved, update its corresponding child id - for child in 1:n_children_per_cell(t) - if t.child_ids[child, parent_id] == source - t.child_ids[child, parent_id] = target - end + @assert first > 0 + @assert first <= last + @assert last <= t.capacity + 1 + @assert destination > 0 + @assert destination <= t.capacity + 1 + + # Strategy + # 1) Loop over moved cells (at target location) + # 2) Check if parent/children/neighbors connections are to a cell that was moved + # a) if cell was moved: apply offset to current cell + # b) if cell was not moved: go to connected cell and update connectivity there + + offset = destination - first + has_moved(n) = (first <= n <= last) + + for source in first:last + target = source + offset + + # Update parent + if has_parent(t, target) + # Get parent cell + parent_id = t.parent_ids[target] + if has_moved(parent_id) + # If parent itself was moved, just update parent id accordingly + t.parent_ids[target] += offset + else + # If parent was not moved, update its corresponding child id + for child in 1:n_children_per_cell(t) + if t.child_ids[child, parent_id] == source + t.child_ids[child, parent_id] = target + end + end + end end - end - end - # Update children - for child in 1:n_children_per_cell(t) - if has_child(t, target, child) - # Get child cell - child_id = t.child_ids[child, target] - if has_moved(child_id) - # If child itself was moved, just update child id accordingly - t.child_ids[child, target] += offset - else - # If child was not moved, update its parent id - t.parent_ids[child_id] = target + # Update children + for child in 1:n_children_per_cell(t) + if has_child(t, target, child) + # Get child cell + child_id = t.child_ids[child, target] + if has_moved(child_id) + # If child itself was moved, just update child id accordingly + t.child_ids[child, target] += offset + else + # If child was not moved, update its parent id + t.parent_ids[child_id] = target + end + end end - end - end - # Update neighbors - for direction in eachdirection(t) - if has_neighbor(t, target, direction) - # Get neighbor cell - neighbor_id = t.neighbor_ids[direction, target] - if has_moved(neighbor_id) - # If neighbor itself was moved, just update neighbor id accordingly - t.neighbor_ids[direction, target] += offset - else - # If neighbor was not moved, update its opposing neighbor id - t.neighbor_ids[opposite_direction(direction), neighbor_id] = target + # Update neighbors + for direction in eachdirection(t) + if has_neighbor(t, target, direction) + # Get neighbor cell + neighbor_id = t.neighbor_ids[direction, target] + if has_moved(neighbor_id) + # If neighbor itself was moved, just update neighbor id accordingly + t.neighbor_ids[direction, target] += offset + else + # If neighbor was not moved, update its opposing neighbor id + t.neighbor_ids[opposite_direction(direction), neighbor_id] = target + end + end end - end end - end end - # Raw copy operation for ranges of cells. # # This method is used by the higher-level copy operations for AbstractContainer # function raw_copy!(target::AbstractTree, source::AbstractTree, first::Int, last::Int, destination::Int) end - # Reset data structures by recreating all internal storage containers and invalidating all elements # function reset_data_structures!(t::AbstractTree{NDIMS}) where NDIMS end - end # @muladd diff --git a/src/meshes/dgmulti_meshes.jl b/src/meshes/dgmulti_meshes.jl index c41f03abcbf..7ae7c0f904e 100644 --- a/src/meshes/dgmulti_meshes.jl +++ b/src/meshes/dgmulti_meshes.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent """ DGMultiMesh{NDIMS, ...} @@ -12,33 +13,39 @@ dispatchable type. This is intended to store geometric data and connectivities f mesh (Cartesian, affine, curved, structured/unstructured). """ struct DGMultiMesh{NDIMS, MeshType, MeshDataT <: MeshData{NDIMS}, BoundaryFaceT} - md::MeshDataT - boundary_faces::BoundaryFaceT + md::MeshDataT + boundary_faces::BoundaryFaceT end # enable use of @set and setproperties(...) for DGMultiMesh -ConstructionBase.constructorof(::Type{DGMultiMesh{T1, T2, T3, T4}}) where {T1, T2, T3, T4} = DGMultiMesh{T1, T2, T3, T4} +function ConstructionBase.constructorof(::Type{DGMultiMesh{T1, T2, T3, T4}}) where {T1, + T2, + T3, + T4} + DGMultiMesh{T1, T2, T3, T4} +end Base.ndims(::DGMultiMesh{NDIMS}) where {NDIMS} = NDIMS function Base.show(io::IO, mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType} - @nospecialize mesh # reduce precompilation time - print(io, "$MeshType DGMultiMesh with NDIMS = $NDIMS.") + @nospecialize mesh # reduce precompilation time + print(io, "$MeshType DGMultiMesh with NDIMS = $NDIMS.") end -function Base.show(io::IO, ::MIME"text/plain", mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType} - @nospecialize mesh # reduce precompilation time - if get(io, :compact, false) - show(io, mesh) - else - summary_header(io, "DGMultiMesh{$NDIMS, $MeshType}, ") - summary_line(io, "number of elements", mesh.md.num_elements) - summary_line(io, "number of boundaries", length(mesh.boundary_faces)) - for (boundary_name, faces) in mesh.boundary_faces - summary_line(increment_indent(io), "nfaces on $boundary_name", length(faces)) +function Base.show(io::IO, ::MIME"text/plain", + mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType} + @nospecialize mesh # reduce precompilation time + if get(io, :compact, false) + show(io, mesh) + else + summary_header(io, "DGMultiMesh{$NDIMS, $MeshType}, ") + summary_line(io, "number of elements", mesh.md.num_elements) + summary_line(io, "number of boundaries", length(mesh.boundary_faces)) + for (boundary_name, faces) in mesh.boundary_faces + summary_line(increment_indent(io), "nfaces on $boundary_name", + length(faces)) + end + summary_footer(io) end - summary_footer(io) - end end - end # @muladd diff --git a/src/meshes/face_interpolant.jl b/src/meshes/face_interpolant.jl index be2f2ddbd76..201cef9a062 100644 --- a/src/meshes/face_interpolant.jl +++ b/src/meshes/face_interpolant.jl @@ -3,50 +3,52 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # CurvedFace{RealT<:Real} # # Contains the data needed to represent a curved face with data points (x,y,z) as a Lagrange polynomial # interpolant written in barycentric form at a given set of nodes. -struct CurvedFace{RealT<:Real} - nodes ::Vector{RealT} - barycentric_weights ::Vector{RealT} - coordinates ::Array{RealT, 3} #[ndims, nnodes, nnodes] +struct CurvedFace{RealT <: Real} + nodes::Vector{RealT} + barycentric_weights::Vector{RealT} + coordinates::Array{RealT, 3} #[ndims, nnodes, nnodes] end - # evaluate the Gamma face interpolant at a particular point s = (s_1, s_2) and return the (x,y,z) coordinate function evaluate_at(s, boundary_face::CurvedFace) - - @unpack nodes, barycentric_weights, coordinates = boundary_face - - x_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 1, :, :), - barycentric_weights) - y_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 2, :, :), - barycentric_weights) - z_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 3, :, :), - barycentric_weights) - - return x_coordinate_at_s_on_boundary_face, - y_coordinate_at_s_on_boundary_face, - z_coordinate_at_s_on_boundary_face + @unpack nodes, barycentric_weights, coordinates = boundary_face + + x_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, + view(coordinates, 1, + :, :), + barycentric_weights) + y_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, + view(coordinates, 2, + :, :), + barycentric_weights) + z_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, + view(coordinates, 3, + :, :), + barycentric_weights) + + return x_coordinate_at_s_on_boundary_face, + y_coordinate_at_s_on_boundary_face, + z_coordinate_at_s_on_boundary_face end - # Calculate a 2D Lagrange interpolating polynomial in barycentric 2 form # of a function f(x,y) at a given coordinate (x,y) for a given node distribution. function lagrange_interpolation_2d(x, nodes, function_values, barycentric_weights) - - f_intermediate = zeros(eltype(function_values), length(nodes)) - for j in eachindex(nodes) - f_intermediate[j] = lagrange_interpolation(x[2], nodes, view(function_values, j, :), - barycentric_weights) - end - point_value = lagrange_interpolation(x[1], nodes, f_intermediate, barycentric_weights) - - return point_value + f_intermediate = zeros(eltype(function_values), length(nodes)) + for j in eachindex(nodes) + f_intermediate[j] = lagrange_interpolation(x[2], nodes, + view(function_values, j, :), + barycentric_weights) + end + point_value = lagrange_interpolation(x[1], nodes, f_intermediate, + barycentric_weights) + + return point_value end - - end # @muladd diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl index beef5341e26..b9c462fa15a 100644 --- a/src/meshes/mesh_io.jl +++ b/src/meshes/mesh_io.jl @@ -3,463 +3,464 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Save current mesh with some context information as an HDF5 file. -function save_mesh_file(mesh::Union{TreeMesh, P4estMesh}, output_directory, timestep=0) - save_mesh_file(mesh, output_directory, timestep, mpi_parallel(mesh)) +function save_mesh_file(mesh::Union{TreeMesh, P4estMesh}, output_directory, + timestep = 0) + save_mesh_file(mesh, output_directory, timestep, mpi_parallel(mesh)) end function save_mesh_file(mesh::TreeMesh, output_directory, timestep, mpi_parallel::False) - # Create output directory (if it does not exist) - mkpath(output_directory) + # Create output directory (if it does not exist) + mkpath(output_directory) - # Determine file name based on existence of meaningful time step - if timestep > 0 - filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) - else - filename = joinpath(output_directory, "mesh.h5") - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - n_cells = length(mesh.tree) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["n_cells"] = n_cells - attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) - attributes(file)["minimum_level"] = minimum_level(mesh.tree) - attributes(file)["maximum_level"] = maximum_level(mesh.tree) - attributes(file)["center_level_0"] = mesh.tree.center_level_0 - attributes(file)["length_level_0"] = mesh.tree.length_level_0 - attributes(file)["periodicity"] = collect(mesh.tree.periodicity) - - # Add tree data - file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] - file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] - file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] - file["levels"] = @view mesh.tree.levels[1:n_cells] - file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] - end - - return filename + # Determine file name based on existence of meaningful time step + if timestep > 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) + else + filename = joinpath(output_directory, "mesh.h5") + end + + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + n_cells = length(mesh.tree) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["n_cells"] = n_cells + attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) + attributes(file)["minimum_level"] = minimum_level(mesh.tree) + attributes(file)["maximum_level"] = maximum_level(mesh.tree) + attributes(file)["center_level_0"] = mesh.tree.center_level_0 + attributes(file)["length_level_0"] = mesh.tree.length_level_0 + attributes(file)["periodicity"] = collect(mesh.tree.periodicity) + + # Add tree data + file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] + file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] + file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] + file["levels"] = @view mesh.tree.levels[1:n_cells] + file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] + end + + return filename end # Save current mesh with some context information as an HDF5 file. function save_mesh_file(mesh::TreeMesh, output_directory, timestep, mpi_parallel::True) - # Create output directory (if it does not exist) - mpi_isroot() && mkpath(output_directory) + # Create output directory (if it does not exist) + mpi_isroot() && mkpath(output_directory) - # Determine file name based on existence of meaningful time step - if timestep >= 0 - filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) - else - filename = joinpath(output_directory, "mesh.h5") - end + # Determine file name based on existence of meaningful time step + if timestep >= 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) + else + filename = joinpath(output_directory, "mesh.h5") + end + + # Since the mesh is replicated on all ranks, only save from MPI root + if !mpi_isroot() + return filename + end + + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + n_cells = length(mesh.tree) + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["n_cells"] = n_cells + attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) + attributes(file)["minimum_level"] = minimum_level(mesh.tree) + attributes(file)["maximum_level"] = maximum_level(mesh.tree) + attributes(file)["center_level_0"] = mesh.tree.center_level_0 + attributes(file)["length_level_0"] = mesh.tree.length_level_0 + attributes(file)["periodicity"] = collect(mesh.tree.periodicity) + + # Add tree data + file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] + file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] + file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] + file["levels"] = @view mesh.tree.levels[1:n_cells] + file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] + end - # Since the mesh is replicated on all ranks, only save from MPI root - if !mpi_isroot() return filename - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - n_cells = length(mesh.tree) - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["n_cells"] = n_cells - attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) - attributes(file)["minimum_level"] = minimum_level(mesh.tree) - attributes(file)["maximum_level"] = maximum_level(mesh.tree) - attributes(file)["center_level_0"] = mesh.tree.center_level_0 - attributes(file)["length_level_0"] = mesh.tree.length_level_0 - attributes(file)["periodicity"] = collect(mesh.tree.periodicity) - - # Add tree data - file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] - file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] - file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] - file["levels"] = @view mesh.tree.levels[1:n_cells] - file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] - end - - return filename end - # Does not save the mesh itself to an HDF5 file. Instead saves important attributes # of the mesh, like its size and the type of boundary mapping function. # Then, within Trixi2Vtk, the StructuredMesh and its node coordinates are reconstructured from # these attributes for plotting purposes function save_mesh_file(mesh::StructuredMesh, output_directory) - # Create output directory (if it does not exist) - mkpath(output_directory) + # Create output directory (if it does not exist) + mkpath(output_directory) - filename = joinpath(output_directory, "mesh.h5") + filename = joinpath(output_directory, "mesh.h5") - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["size"] = collect(size(mesh)) - attributes(file)["mapping"] = mesh.mapping_as_string - end + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["size"] = collect(size(mesh)) + attributes(file)["mapping"] = mesh.mapping_as_string + end - return filename + return filename end - # Does not save the mesh itself to an HDF5 file. Instead saves important attributes # of the mesh, like its size and the corresponding `.mesh` file used to construct the mesh. # Then, within Trixi2Vtk, the UnstructuredMesh2D and its node coordinates are reconstructured # from these attributes for plotting purposes function save_mesh_file(mesh::UnstructuredMesh2D, output_directory) - # Create output directory (if it does not exist) - mkpath(output_directory) - - filename = joinpath(output_directory, "mesh.h5") - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["size"] = length(mesh) - attributes(file)["mesh_filename"] = mesh.filename - attributes(file)["periodicity"] = collect(mesh.periodicity) - end - - return filename -end + # Create output directory (if it does not exist) + mkpath(output_directory) + + filename = joinpath(output_directory, "mesh.h5") + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["size"] = length(mesh) + attributes(file)["mesh_filename"] = mesh.filename + attributes(file)["periodicity"] = collect(mesh.periodicity) + end + + return filename +end # Does not save the mesh itself to an HDF5 file. Instead saves important attributes # of the mesh, like its size and the type of boundary mapping function. # Then, within Trixi2Vtk, the P4estMesh and its node coordinates are reconstructured from # these attributes for plotting purposes -function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_parallel::False) - # Create output directory (if it does not exist) - mkpath(output_directory) - - # Determine file name based on existence of meaningful time step - if timestep > 0 - filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) - p4est_filename = @sprintf("p4est_data_%06d", timestep) - else - filename = joinpath(output_directory, "mesh.h5") - p4est_filename = "p4est_data" - end +function save_mesh_file(mesh::P4estMesh, output_directory, timestep, + mpi_parallel::False) + # Create output directory (if it does not exist) + mkpath(output_directory) - p4est_file = joinpath(output_directory, p4est_filename) + # Determine file name based on existence of meaningful time step + if timestep > 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) + p4est_filename = @sprintf("p4est_data_%06d", timestep) + else + filename = joinpath(output_directory, "mesh.h5") + p4est_filename = "p4est_data" + end - # Save the complete connectivity and `p4est` data to disk. - save_p4est!(p4est_file, mesh.p4est) + p4est_file = joinpath(output_directory, p4est_filename) - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["p4est_file"] = p4est_filename + # Save the complete connectivity and `p4est` data to disk. + save_p4est!(p4est_file, mesh.p4est) - file["tree_node_coordinates"] = mesh.tree_node_coordinates - file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes - # to increase the runtime performance - # but HDF5 can only handle plain arrays - file["boundary_names"] = mesh.boundary_names .|> String - end + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["p4est_file"] = p4est_filename - return filename + file["tree_node_coordinates"] = mesh.tree_node_coordinates + file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes + # to increase the runtime performance + # but HDF5 can only handle plain arrays + file["boundary_names"] = mesh.boundary_names .|> String + end + + return filename end function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_parallel::True) - # Create output directory (if it does not exist) - mpi_isroot() && mkpath(output_directory) - - # Determine file name based on existence of meaningful time step - if timestep > 0 - filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) - p4est_filename = @sprintf("p4est_data_%06d", timestep) - else - filename = joinpath(output_directory, "mesh.h5") - p4est_filename = "p4est_data" - end + # Create output directory (if it does not exist) + mpi_isroot() && mkpath(output_directory) - p4est_file = joinpath(output_directory, p4est_filename) + # Determine file name based on existence of meaningful time step + if timestep > 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep)) + p4est_filename = @sprintf("p4est_data_%06d", timestep) + else + filename = joinpath(output_directory, "mesh.h5") + p4est_filename = "p4est_data" + end + + p4est_file = joinpath(output_directory, p4est_filename) - # Save the complete connectivity/p4est data to disk. - save_p4est!(p4est_file, mesh.p4est) + # Save the complete connectivity/p4est data to disk. + save_p4est!(p4est_file, mesh.p4est) + + # Since the mesh attributes are replicated on all ranks, only save from MPI root + if !mpi_isroot() + return filename + end + + # Open file (clobber existing content) + h5open(filename, "w") do file + # Add context information as attributes + attributes(file)["mesh_type"] = get_name(mesh) + attributes(file)["ndims"] = ndims(mesh) + attributes(file)["p4est_file"] = p4est_filename + + file["tree_node_coordinates"] = mesh.tree_node_coordinates + file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes + # to increase the runtime performance + # but HDF5 can only handle plain arrays + file["boundary_names"] = mesh.boundary_names .|> String + end - # Since the mesh attributes are replicated on all ranks, only save from MPI root - if !mpi_isroot() return filename - end - - # Open file (clobber existing content) - h5open(filename, "w") do file - # Add context information as attributes - attributes(file)["mesh_type"] = get_name(mesh) - attributes(file)["ndims"] = ndims(mesh) - attributes(file)["p4est_file"] = p4est_filename - - file["tree_node_coordinates"] = mesh.tree_node_coordinates - file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes - # to increase the runtime performance - # but HDF5 can only handle plain arrays - file["boundary_names"] = mesh.boundary_names .|> String - end - - return filename end - """ load_mesh(restart_file::AbstractString; n_cells_max) Load the mesh from the `restart_file`. """ -function load_mesh(restart_file::AbstractString; n_cells_max=0, RealT=Float64) - if mpi_isparallel() - mesh_file = get_restart_mesh_filename(restart_file, True()) - return load_mesh_parallel(mesh_file; n_cells_max=n_cells_max, RealT=RealT) - else - mesh_file = get_restart_mesh_filename(restart_file, False()) - load_mesh_serial(mesh_file; n_cells_max=n_cells_max, RealT=RealT) - end +function load_mesh(restart_file::AbstractString; n_cells_max = 0, RealT = Float64) + if mpi_isparallel() + mesh_file = get_restart_mesh_filename(restart_file, True()) + return load_mesh_parallel(mesh_file; n_cells_max = n_cells_max, RealT = RealT) + else + mesh_file = get_restart_mesh_filename(restart_file, False()) + load_mesh_serial(mesh_file; n_cells_max = n_cells_max, RealT = RealT) + end end function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT) - ndims, mesh_type = h5open(mesh_file, "r") do file - return read(attributes(file)["ndims"]), - read(attributes(file)["mesh_type"]) - end - - if mesh_type == "TreeMesh" - n_cells = h5open(mesh_file, "r") do file - return read(attributes(file)["n_cells"]) - end - mesh = TreeMesh(SerialTree{ndims}, max(n_cells, n_cells_max)) - load_mesh!(mesh, mesh_file) - elseif mesh_type == "StructuredMesh" - size_, mapping_as_string = h5open(mesh_file, "r") do file - return read(attributes(file)["size"]), - read(attributes(file)["mapping"]) - end - - size = Tuple(size_) - - # TODO: `@eval` is evil - # A temporary workaround to evaluate the code that defines the domain mapping in a local scope. - # This prevents errors when multiple restart elixirs are executed in one session, where one - # defines `mapping` as a variable, while the other defines it as a function. - # - # This should be replaced with something more robust and secure, - # see https://github.com/trixi-framework/Trixi.jl/issues/541). - expr = Meta.parse(mapping_as_string) - if expr.head == :toplevel - expr.head = :block - end - - if ndims == 1 - mapping = @eval function(xi) - $expr - mapping(xi) - end - elseif ndims == 2 - mapping = @eval function(xi, eta) - $expr - mapping(xi, eta) - end - else # ndims == 3 - mapping = @eval function(xi, eta, zeta) - $expr - mapping(xi, eta, zeta) - end + ndims, mesh_type = h5open(mesh_file, "r") do file + return read(attributes(file)["ndims"]), + read(attributes(file)["mesh_type"]) end - mesh = StructuredMesh(size, mapping; RealT=RealT, unsaved_changes=false, - mapping_as_string=mapping_as_string) - elseif mesh_type == "UnstructuredMesh2D" - mesh_filename, periodicity_ = h5open(mesh_file, "r") do file - return read(attributes(file)["mesh_filename"]), - read(attributes(file)["periodicity"]) - end - mesh = UnstructuredMesh2D(mesh_filename; RealT=RealT, periodicity=periodicity_, - unsaved_changes=false) - elseif mesh_type == "P4estMesh" - p4est_filename, tree_node_coordinates, + if mesh_type == "TreeMesh" + n_cells = h5open(mesh_file, "r") do file + return read(attributes(file)["n_cells"]) + end + mesh = TreeMesh(SerialTree{ndims}, max(n_cells, n_cells_max)) + load_mesh!(mesh, mesh_file) + elseif mesh_type == "StructuredMesh" + size_, mapping_as_string = h5open(mesh_file, "r") do file + return read(attributes(file)["size"]), + read(attributes(file)["mapping"]) + end + + size = Tuple(size_) + + # TODO: `@eval` is evil + # A temporary workaround to evaluate the code that defines the domain mapping in a local scope. + # This prevents errors when multiple restart elixirs are executed in one session, where one + # defines `mapping` as a variable, while the other defines it as a function. + # + # This should be replaced with something more robust and secure, + # see https://github.com/trixi-framework/Trixi.jl/issues/541). + expr = Meta.parse(mapping_as_string) + if expr.head == :toplevel + expr.head = :block + end + + if ndims == 1 + mapping = @eval function (xi) + $expr + mapping(xi) + end + elseif ndims == 2 + mapping = @eval function (xi, eta) + $expr + mapping(xi, eta) + end + else # ndims == 3 + mapping = @eval function (xi, eta, zeta) + $expr + mapping(xi, eta, zeta) + end + end + + mesh = StructuredMesh(size, mapping; RealT = RealT, unsaved_changes = false, + mapping_as_string = mapping_as_string) + elseif mesh_type == "UnstructuredMesh2D" + mesh_filename, periodicity_ = h5open(mesh_file, "r") do file + return read(attributes(file)["mesh_filename"]), + read(attributes(file)["periodicity"]) + end + mesh = UnstructuredMesh2D(mesh_filename; RealT = RealT, + periodicity = periodicity_, + unsaved_changes = false) + elseif mesh_type == "P4estMesh" + p4est_filename, tree_node_coordinates, nodes, boundary_names_ = h5open(mesh_file, "r") do file - return read(attributes(file)["p4est_file"]), - read(file["tree_node_coordinates"]), - read(file["nodes"]), - read(file["boundary_names"]) - end + return read(attributes(file)["p4est_file"]), + read(file["tree_node_coordinates"]), + read(file["nodes"]), + read(file["boundary_names"]) + end - boundary_names = boundary_names_ .|> Symbol + boundary_names = boundary_names_ .|> Symbol - p4est_file = joinpath(dirname(mesh_file), p4est_filename) - # Prevent Julia crashes when `p4est` can't find the file - @assert isfile(p4est_file) + p4est_file = joinpath(dirname(mesh_file), p4est_filename) + # Prevent Julia crashes when `p4est` can't find the file + @assert isfile(p4est_file) - p4est = load_p4est(p4est_file, Val(ndims)) + p4est = load_p4est(p4est_file, Val(ndims)) - mesh = P4estMesh{ndims}(p4est, tree_node_coordinates, - nodes, boundary_names, "", false, true) - else - error("Unknown mesh type!") - end + mesh = P4estMesh{ndims}(p4est, tree_node_coordinates, + nodes, boundary_names, "", false, true) + else + error("Unknown mesh type!") + end - return mesh + return mesh end function load_mesh!(mesh::SerialTreeMesh, mesh_file::AbstractString) - mesh.current_filename = mesh_file - mesh.unsaved_changes = false - - # Read mesh file - h5open(mesh_file, "r") do file - # Set domain information - mesh.tree.center_level_0 = read(attributes(file)["center_level_0"]) - mesh.tree.length_level_0 = read(attributes(file)["length_level_0"]) - mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"])) - - # Set length - n_cells = read(attributes(file)["n_cells"]) - resize!(mesh.tree, n_cells) - - # Read in data - mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) - mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) - mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) - mesh.tree.levels[1:n_cells] = read(file["levels"]) - mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) - end - - return mesh -end + mesh.current_filename = mesh_file + mesh.unsaved_changes = false + # Read mesh file + h5open(mesh_file, "r") do file + # Set domain information + mesh.tree.center_level_0 = read(attributes(file)["center_level_0"]) + mesh.tree.length_level_0 = read(attributes(file)["length_level_0"]) + mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"])) + + # Set length + n_cells = read(attributes(file)["n_cells"]) + resize!(mesh.tree, n_cells) + + # Read in data + mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) + mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) + mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) + mesh.tree.levels[1:n_cells] = read(file["levels"]) + mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) + end + + return mesh +end function load_mesh_parallel(mesh_file::AbstractString; n_cells_max, RealT) - if mpi_isroot() - ndims_, mesh_type = h5open(mesh_file, "r") do file - return read(attributes(file)["ndims"]), - read(attributes(file)["mesh_type"]) - end - MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm()) - MPI.bcast(mesh_type, mpi_root(), mpi_comm()) - else - ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] - mesh_type = MPI.bcast(nothing, mpi_root(), mpi_comm()) - end - - if mesh_type == "TreeMesh" if mpi_isroot() - n_cells = h5open(mesh_file, "r") do file - read(attributes(file)["n_cells"]) - end - MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm()) - MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) + ndims_, mesh_type = h5open(mesh_file, "r") do file + return read(attributes(file)["ndims"]), + read(attributes(file)["mesh_type"]) + end + MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm()) + MPI.bcast(mesh_type, mpi_root(), mpi_comm()) else - ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] - n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + mesh_type = MPI.bcast(nothing, mpi_root(), mpi_comm()) end - mesh = TreeMesh(ParallelTree{ndims_}, max(n_cells, n_cells_max)) - load_mesh!(mesh, mesh_file) - elseif mesh_type == "P4estMesh" - if mpi_isroot() - p4est_filename, tree_node_coordinates, - nodes, boundary_names_ = h5open(mesh_file, "r") do file - return read(attributes(file)["p4est_file"]), - read(file["tree_node_coordinates"]), - read(file["nodes"]), - read(file["boundary_names"]) - end - - boundary_names = boundary_names_ .|> Symbol - - p4est_file = joinpath(dirname(mesh_file), p4est_filename) - - data = (p4est_file, tree_node_coordinates, nodes, boundary_names) - MPI.bcast(data, mpi_root(), mpi_comm()) + if mesh_type == "TreeMesh" + if mpi_isroot() + n_cells = h5open(mesh_file, "r") do file + read(attributes(file)["n_cells"]) + end + MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm()) + MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) + else + ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + end + + mesh = TreeMesh(ParallelTree{ndims_}, max(n_cells, n_cells_max)) + load_mesh!(mesh, mesh_file) + elseif mesh_type == "P4estMesh" + if mpi_isroot() + p4est_filename, tree_node_coordinates, + nodes, boundary_names_ = h5open(mesh_file, "r") do file + return read(attributes(file)["p4est_file"]), + read(file["tree_node_coordinates"]), + read(file["nodes"]), + read(file["boundary_names"]) + end + + boundary_names = boundary_names_ .|> Symbol + + p4est_file = joinpath(dirname(mesh_file), p4est_filename) + + data = (p4est_file, tree_node_coordinates, nodes, boundary_names) + MPI.bcast(data, mpi_root(), mpi_comm()) + else + data = MPI.bcast(nothing, mpi_root(), mpi_comm()) + p4est_file, tree_node_coordinates, nodes, boundary_names = data + end + + # Prevent Julia crashes when `p4est` can't find the file + @assert isfile(p4est_file) + + p4est = load_p4est(p4est_file, Val(ndims_)) + + mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates, + nodes, boundary_names, "", false, true) else - data = MPI.bcast(nothing, mpi_root(), mpi_comm()) - p4est_file, tree_node_coordinates, nodes, boundary_names = data + error("Unknown mesh type!") end - # Prevent Julia crashes when `p4est` can't find the file - @assert isfile(p4est_file) - - p4est = load_p4est(p4est_file, Val(ndims_)) - - mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates, - nodes, boundary_names, "", false, true) - else - error("Unknown mesh type!") - end - - return mesh + return mesh end function load_mesh!(mesh::ParallelTreeMesh, mesh_file::AbstractString) - mesh.current_filename = mesh_file - mesh.unsaved_changes = false + mesh.current_filename = mesh_file + mesh.unsaved_changes = false - if mpi_isroot() - h5open(mesh_file, "r") do file - # Set domain information - mesh.tree.center_level_0 = read(attributes(file)["center_level_0"]) - mesh.tree.length_level_0 = read(attributes(file)["length_level_0"]) - mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"])) - MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) - MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm()) - MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm()) - - # Set length - n_cells = read(attributes(file)["n_cells"]) - MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) - resize!(mesh.tree, n_cells) - - # Read in data - mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) - mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) - mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) - mesh.tree.levels[1:n_cells] = read(file["levels"]) - mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) - @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) + if mpi_isroot() + h5open(mesh_file, "r") do file + # Set domain information + mesh.tree.center_level_0 = read(attributes(file)["center_level_0"]) + mesh.tree.length_level_0 = read(attributes(file)["length_level_0"]) + mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"])) + MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm()) + + # Set length + n_cells = read(attributes(file)["n_cells"]) + MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) + resize!(mesh.tree, n_cells) + + # Read in data + mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) + mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) + mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) + mesh.tree.levels[1:n_cells] = read(file["levels"]) + mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), + mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), + mpi_comm()) + end + else # non-root ranks + # Set domain information + mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), + mpi_root(), mpi_comm()) + mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), + mpi_root(), mpi_comm())[1] + mesh.tree.periodicity = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity), + mpi_root(), mpi_comm())) + + # Set length + n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + resize!(mesh.tree, n_cells) + + # Read in data + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) end - else # non-root ranks - # Set domain information - mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) - mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())[1] - mesh.tree.periodicity = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm())) - - # Set length - n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] - resize!(mesh.tree, n_cells) - - # Read in data - @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) - @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) - end - - # Partition mesh - partition!(mesh) - - return mesh -end + # Partition mesh + partition!(mesh) + return mesh +end end # @muladd diff --git a/src/meshes/meshes.jl b/src/meshes/meshes.jl index a6dcbe132d8..2716aa2007b 100644 --- a/src/meshes/meshes.jl +++ b/src/meshes/meshes.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent include("tree_mesh.jl") include("structured_mesh.jl") @@ -14,6 +14,4 @@ include("transfinite_mappings_3d.jl") include("p4est_mesh.jl") include("mesh_io.jl") include("dgmulti_meshes.jl") - - end # @muladd diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl index 2a9777f2a11..ddd6cf473e4 100644 --- a/src/meshes/p4est_mesh.jl +++ b/src/meshes/p4est_mesh.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ P4estMesh{NDIMS} <: AbstractMesh{NDIMS} @@ -11,100 +11,107 @@ An unstructured curved mesh based on trees that uses the C library `p4est` to manage trees and mesh refinement. """ -mutable struct P4estMesh{NDIMS, RealT<:Real, IsParallel, P, Ghost, NDIMSP2, NNODES} <: AbstractMesh{NDIMS} - p4est ::P # Either Ptr{p4est_t} or Ptr{p8est_t} - is_parallel ::IsParallel - ghost ::Ghost # Either Ptr{p4est_ghost_t} or Ptr{p8est_ghost_t} - # Coordinates at the nodes specified by the tensor product of `nodes` (NDIMS times). - # This specifies the geometry interpolation for each tree. - tree_node_coordinates ::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree] - nodes ::SVector{NNODES, RealT} - boundary_names ::Array{Symbol, 2} # [face direction, tree] - current_filename ::String - unsaved_changes ::Bool - p4est_partition_allow_for_coarsening::Bool - - function P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, boundary_names, - current_filename, unsaved_changes, p4est_partition_allow_for_coarsening) where NDIMS - if NDIMS == 2 - @assert p4est isa Ptr{p4est_t} - elseif NDIMS == 3 - @assert p4est isa Ptr{p8est_t} - end +mutable struct P4estMesh{NDIMS, RealT <: Real, IsParallel, P, Ghost, NDIMSP2, NNODES} <: + AbstractMesh{NDIMS} + p4est::P # Either Ptr{p4est_t} or Ptr{p8est_t} + is_parallel::IsParallel + ghost::Ghost # Either Ptr{p4est_ghost_t} or Ptr{p8est_ghost_t} + # Coordinates at the nodes specified by the tensor product of `nodes` (NDIMS times). + # This specifies the geometry interpolation for each tree. + tree_node_coordinates::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree] + nodes::SVector{NNODES, RealT} + boundary_names::Array{Symbol, 2} # [face direction, tree] + current_filename::String + unsaved_changes::Bool + p4est_partition_allow_for_coarsening::Bool + + function P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, boundary_names, + current_filename, unsaved_changes, + p4est_partition_allow_for_coarsening) where {NDIMS} + if NDIMS == 2 + @assert p4est isa Ptr{p4est_t} + elseif NDIMS == 3 + @assert p4est isa Ptr{p8est_t} + end - if mpi_isparallel() - if !P4est.uses_mpi() - error("p4est library does not support MPI") - end - is_parallel = True() - else - is_parallel = False() - end + if mpi_isparallel() + if !P4est.uses_mpi() + error("p4est library does not support MPI") + end + is_parallel = True() + else + is_parallel = False() + end - ghost = ghost_new_p4est(p4est) + ghost = ghost_new_p4est(p4est) - mesh = new{NDIMS, eltype(tree_node_coordinates), typeof(is_parallel), typeof(p4est), typeof(ghost), NDIMS+2, length(nodes)}( - p4est, is_parallel, ghost, tree_node_coordinates, nodes, boundary_names, current_filename, unsaved_changes, - p4est_partition_allow_for_coarsening) + mesh = new{NDIMS, eltype(tree_node_coordinates), typeof(is_parallel), + typeof(p4est), typeof(ghost), NDIMS + 2, length(nodes)}(p4est, + is_parallel, + ghost, + tree_node_coordinates, + nodes, + boundary_names, + current_filename, + unsaved_changes, + p4est_partition_allow_for_coarsening) - # Destroy `p4est` structs when the mesh is garbage collected - finalizer(destroy_mesh, mesh) + # Destroy `p4est` structs when the mesh is garbage collected + finalizer(destroy_mesh, mesh) - return mesh - end + return mesh + end end -const SerialP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:False} +const SerialP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:False} const ParallelP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:True} @inline mpi_parallel(mesh::SerialP4estMesh) = False() @inline mpi_parallel(mesh::ParallelP4estMesh) = True() - function destroy_mesh(mesh::P4estMesh{2}) - connectivity = unsafe_load(mesh.p4est).connectivity - p4est_ghost_destroy(mesh.ghost) - p4est_destroy(mesh.p4est) - p4est_connectivity_destroy(connectivity) + connectivity = unsafe_load(mesh.p4est).connectivity + p4est_ghost_destroy(mesh.ghost) + p4est_destroy(mesh.p4est) + p4est_connectivity_destroy(connectivity) end function destroy_mesh(mesh::P4estMesh{3}) - connectivity = unsafe_load(mesh.p4est).connectivity - p8est_ghost_destroy(mesh.ghost) - p8est_destroy(mesh.p4est) - p8est_connectivity_destroy(connectivity) + connectivity = unsafe_load(mesh.p4est).connectivity + p8est_ghost_destroy(mesh.ghost) + p8est_destroy(mesh.p4est) + p8est_connectivity_destroy(connectivity) end - -@inline Base.ndims(::P4estMesh{NDIMS}) where NDIMS = NDIMS +@inline Base.ndims(::P4estMesh{NDIMS}) where {NDIMS} = NDIMS @inline Base.real(::P4estMesh{NDIMS, RealT}) where {NDIMS, RealT} = RealT @inline function ntrees(mesh::P4estMesh) - trees = unsafe_load(mesh.p4est).trees - return unsafe_load(trees).elem_count + trees = unsafe_load(mesh.p4est).trees + return unsafe_load(trees).elem_count end # returns Int32 by default which causes a weird method error when creating the cache @inline ncells(mesh::P4estMesh) = Int(unsafe_load(mesh.p4est).local_num_quadrants) - function Base.show(io::IO, mesh::P4estMesh) - print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}") + print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}") end function Base.show(io::IO, ::MIME"text/plain", mesh::P4estMesh) - if get(io, :compact, false) - show(io, mesh) - else - setup = [ - "#trees" => ntrees(mesh), - "current #cells" => ncells(mesh), - "polydeg" => length(mesh.nodes) - 1, - ] - summary_box(io, "P4estMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * "}", setup) - end + if get(io, :compact, false) + show(io, mesh) + else + setup = [ + "#trees" => ntrees(mesh), + "current #cells" => ncells(mesh), + "polydeg" => length(mesh.nodes) - 1, + ] + summary_box(io, + "P4estMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * + "}", setup) + end end - """ P4estMesh(trees_per_dimension; polydeg, mapping=nothing, faces=nothing, coordinates_min=nothing, coordinates_max=nothing, @@ -153,128 +160,129 @@ Non-periodic boundaries will be called `:x_neg`, `:x_pos`, `:y_neg`, `:y_pos`, ` to permit more fine-grained partitioning. """ function P4estMesh(trees_per_dimension; polydeg, - mapping=nothing, faces=nothing, coordinates_min=nothing, coordinates_max=nothing, - RealT=Float64, initial_refinement_level=0, periodicity=true, unsaved_changes=true, - p4est_partition_allow_for_coarsening=true) - - @assert ( - (coordinates_min === nothing) === (coordinates_max === nothing) - ) "Either both or none of coordinates_min and coordinates_max must be specified" - - @assert count(i -> i !== nothing, - (mapping, faces, coordinates_min) - ) == 1 "Exactly one of mapping, faces and coordinates_min/max must be specified" - - # Extract mapping - if faces !== nothing - validate_faces(faces) - mapping = transfinite_mapping(faces) - elseif coordinates_min !== nothing - mapping = coordinates2mapping(coordinates_min, coordinates_max) - end - - NDIMS = length(trees_per_dimension) - - # Convert periodicity to a Tuple of a Bool for every dimension - if all(periodicity) - # Also catches case where periodicity = true - periodicity = ntuple(_->true, NDIMS) - elseif !any(periodicity) - # Also catches case where periodicity = false - periodicity = ntuple(_->false, NDIMS) - else - # Default case if periodicity is an iterable - periodicity = Tuple(periodicity) - end - - basis = LobattoLegendreBasis(RealT, polydeg) - nodes = basis.nodes - tree_node_coordinates = Array{RealT, NDIMS+2}(undef, NDIMS, - ntuple(_ -> length(nodes), NDIMS)..., - prod(trees_per_dimension)) - calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, trees_per_dimension) - - # p4est_connectivity_new_brick has trees in Z-order, so use our own function for this - connectivity = connectivity_structured(trees_per_dimension..., periodicity) - - p4est = new_p4est(connectivity, initial_refinement_level) - - # Non-periodic boundaries - boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension)) - - structured_boundary_names!(boundary_names, trees_per_dimension, periodicity) - - return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, - boundary_names, "", unsaved_changes, - p4est_partition_allow_for_coarsening) -end + mapping = nothing, faces = nothing, coordinates_min = nothing, + coordinates_max = nothing, + RealT = Float64, initial_refinement_level = 0, periodicity = true, + unsaved_changes = true, + p4est_partition_allow_for_coarsening = true) + @assert ((coordinates_min === nothing)===(coordinates_max === nothing)) "Either both or none of coordinates_min and coordinates_max must be specified" + + @assert count(i -> i !== nothing, + (mapping, faces, coordinates_min))==1 "Exactly one of mapping, faces and coordinates_min/max must be specified" + + # Extract mapping + if faces !== nothing + validate_faces(faces) + mapping = transfinite_mapping(faces) + elseif coordinates_min !== nothing + mapping = coordinates2mapping(coordinates_min, coordinates_max) + end -# 2D version -function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{2}, periodicity) - linear_indices = LinearIndices(trees_per_dimension) + NDIMS = length(trees_per_dimension) + + # Convert periodicity to a Tuple of a Bool for every dimension + if all(periodicity) + # Also catches case where periodicity = true + periodicity = ntuple(_ -> true, NDIMS) + elseif !any(periodicity) + # Also catches case where periodicity = false + periodicity = ntuple(_ -> false, NDIMS) + else + # Default case if periodicity is an iterable + periodicity = Tuple(periodicity) + end + + basis = LobattoLegendreBasis(RealT, polydeg) + nodes = basis.nodes + tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS, + ntuple(_ -> length(nodes), + NDIMS)..., + prod(trees_per_dimension)) + calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, + trees_per_dimension) + + # p4est_connectivity_new_brick has trees in Z-order, so use our own function for this + connectivity = connectivity_structured(trees_per_dimension..., periodicity) + + p4est = new_p4est(connectivity, initial_refinement_level) - # Boundaries in x-direction - if !periodicity[1] - for cell_y in 1:trees_per_dimension[2] - tree = linear_indices[1, cell_y] - boundary_names[1, tree] = :x_neg + # Non-periodic boundaries + boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension)) - tree = linear_indices[end, cell_y] - boundary_names[2, tree] = :x_pos + structured_boundary_names!(boundary_names, trees_per_dimension, periodicity) + + return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, + boundary_names, "", unsaved_changes, + p4est_partition_allow_for_coarsening) +end + +# 2D version +function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{2}, + periodicity) + linear_indices = LinearIndices(trees_per_dimension) + + # Boundaries in x-direction + if !periodicity[1] + for cell_y in 1:trees_per_dimension[2] + tree = linear_indices[1, cell_y] + boundary_names[1, tree] = :x_neg + + tree = linear_indices[end, cell_y] + boundary_names[2, tree] = :x_pos + end end - end - # Boundaries in y-direction - if !periodicity[2] - for cell_x in 1:trees_per_dimension[1] - tree = linear_indices[cell_x, 1] - boundary_names[3, tree] = :y_neg + # Boundaries in y-direction + if !periodicity[2] + for cell_x in 1:trees_per_dimension[1] + tree = linear_indices[cell_x, 1] + boundary_names[3, tree] = :y_neg - tree = linear_indices[cell_x, end] - boundary_names[4, tree] = :y_pos + tree = linear_indices[cell_x, end] + boundary_names[4, tree] = :y_pos + end end - end end # 3D version -function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{3}, periodicity) - linear_indices = LinearIndices(trees_per_dimension) - - # Boundaries in x-direction - if !periodicity[1] - for cell_z in 1:trees_per_dimension[3], cell_y in 1:trees_per_dimension[2] - tree = linear_indices[1, cell_y, cell_z] - boundary_names[1, tree] = :x_neg - - tree = linear_indices[end, cell_y, cell_z] - boundary_names[2, tree] = :x_pos +function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{3}, + periodicity) + linear_indices = LinearIndices(trees_per_dimension) + + # Boundaries in x-direction + if !periodicity[1] + for cell_z in 1:trees_per_dimension[3], cell_y in 1:trees_per_dimension[2] + tree = linear_indices[1, cell_y, cell_z] + boundary_names[1, tree] = :x_neg + + tree = linear_indices[end, cell_y, cell_z] + boundary_names[2, tree] = :x_pos + end end - end - # Boundaries in y-direction - if !periodicity[2] - for cell_z in 1:trees_per_dimension[3], cell_x in 1:trees_per_dimension[1] - tree = linear_indices[cell_x, 1, cell_z] - boundary_names[3, tree] = :y_neg + # Boundaries in y-direction + if !periodicity[2] + for cell_z in 1:trees_per_dimension[3], cell_x in 1:trees_per_dimension[1] + tree = linear_indices[cell_x, 1, cell_z] + boundary_names[3, tree] = :y_neg - tree = linear_indices[cell_x, end, cell_z] - boundary_names[4, tree] = :y_pos + tree = linear_indices[cell_x, end, cell_z] + boundary_names[4, tree] = :y_pos + end end - end - # Boundaries in z-direction - if !periodicity[3] - for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1] - tree = linear_indices[cell_x, cell_y, 1] - boundary_names[5, tree] = :z_neg + # Boundaries in z-direction + if !periodicity[3] + for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1] + tree = linear_indices[cell_x, cell_y, 1] + boundary_names[5, tree] = :z_neg - tree = linear_indices[cell_x, cell_y, end] - boundary_names[6, tree] = :z_pos + tree = linear_indices[cell_x, cell_y, end] + boundary_names[6, tree] = :z_pos + end end - end end - """ P4estMesh{NDIMS}(meshfile::String; mapping=nothing, polydeg=1, RealT=Float64, @@ -337,125 +345,134 @@ For example, if a two-dimensional base mesh contains 25 elements then setting to permit more fine-grained partitioning. """ function P4estMesh{NDIMS}(meshfile::String; - mapping=nothing, polydeg=1, RealT=Float64, - initial_refinement_level=0, unsaved_changes=true, - p4est_partition_allow_for_coarsening=true) where NDIMS - # Prevent `p4est` from crashing Julia if the file doesn't exist - @assert isfile(meshfile) - - # Read in the Header of the meshfile to determine which constructor is appropriate - header = open(meshfile, "r") do io - readline(io) # *Header of the Abaqus file; discarded - readline(io) # Readin the actual header information - end - - # Check if the meshfile was generated using HOHQMesh - if header == " File created by HOHQMesh" - # Mesh curvature and boundary naming is handled with additional information available in meshfile - p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level, - NDIMS, RealT) - else - # Mesh curvature is handled directly by applying the mapping keyword argument - p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg, - initial_refinement_level, - NDIMS, RealT) - end - - return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, - boundary_names, "", unsaved_changes, - p4est_partition_allow_for_coarsening) -end + mapping = nothing, polydeg = 1, RealT = Float64, + initial_refinement_level = 0, unsaved_changes = true, + p4est_partition_allow_for_coarsening = true) where {NDIMS} + # Prevent `p4est` from crashing Julia if the file doesn't exist + @assert isfile(meshfile) + + # Read in the Header of the meshfile to determine which constructor is appropriate + header = open(meshfile, "r") do io + readline(io) # *Header of the Abaqus file; discarded + readline(io) # Readin the actual header information + end + # Check if the meshfile was generated using HOHQMesh + if header == " File created by HOHQMesh" + # Mesh curvature and boundary naming is handled with additional information available in meshfile + p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_hohqmesh_abaqus(meshfile, + initial_refinement_level, + NDIMS, + RealT) + else + # Mesh curvature is handled directly by applying the mapping keyword argument + p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_standard_abaqus(meshfile, + mapping, + polydeg, + initial_refinement_level, + NDIMS, + RealT) + end + + return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, + boundary_names, "", unsaved_changes, + p4est_partition_allow_for_coarsening) +end # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1] # and a list of boundary names for the `P4estMesh`. High-order boundary curve information as well as # the boundary names on each tree are provided by the `meshfile` created by # [`HOHQMesh.jl`](https://github.com/trixi-framework/HOHQMesh.jl). -function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level, n_dimensions, RealT) - # Create the mesh connectivity using `p4est` - connectivity = read_inp_p4est(meshfile, Val(n_dimensions)) - connectivity_obj = unsafe_load(connectivity) - - # These need to be of the type Int for unsafe_wrap below to work - n_trees::Int = connectivity_obj.num_trees - n_vertices::Int = connectivity_obj.num_vertices - - # Extract a copy of the element vertices to compute the tree node coordinates - vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices)) - - # Readin all the information from the mesh file into a string array - file_lines = readlines(open(meshfile)) +function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level, + n_dimensions, RealT) + # Create the mesh connectivity using `p4est` + connectivity = read_inp_p4est(meshfile, Val(n_dimensions)) + connectivity_obj = unsafe_load(connectivity) - # Get the file index where the mesh polynomial degree is given in the meshfile - file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + # These need to be of the type Int for unsafe_wrap below to work + n_trees::Int = connectivity_obj.num_trees + n_vertices::Int = connectivity_obj.num_vertices - # Get the polynomial order of the mesh boundary information - current_line = split(file_lines[file_idx]) - mesh_polydeg = parse(Int, current_line[6]) - mesh_nnodes = mesh_polydeg + 1 + # Extract a copy of the element vertices to compute the tree node coordinates + vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices)) - # Create the Chebyshev-Gauss-Lobatto nodes used by HOHQMesh to represent the boundaries - cheby_nodes, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes) - nodes = SVector{mesh_nnodes}(cheby_nodes) + # Readin all the information from the mesh file into a string array + file_lines = readlines(open(meshfile)) - # Allocate the memory for the tree node coordinates - tree_node_coordinates = Array{RealT, n_dimensions+2}(undef, n_dimensions, - ntuple(_ -> length(nodes), n_dimensions)..., - n_trees) + # Get the file index where the mesh polynomial degree is given in the meshfile + file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) - # Compute the tree node coordinates and return the updated file index - file_idx = calc_tree_node_coordinates!(tree_node_coordinates, file_lines, nodes, vertices, RealT) - - # Allocate the memory for the boundary labels - boundary_names = Array{Symbol}(undef, (2 * n_dimensions, n_trees)) - - # Read in the boundary names from the last portion of the meshfile - # Note here the boundary names where "---" means an internal connection - for tree in 1:n_trees + # Get the polynomial order of the mesh boundary information current_line = split(file_lines[file_idx]) - boundary_names[:, tree] = map(Symbol, current_line[2:end]) - file_idx += 1 - end + mesh_polydeg = parse(Int, current_line[6]) + mesh_nnodes = mesh_polydeg + 1 + + # Create the Chebyshev-Gauss-Lobatto nodes used by HOHQMesh to represent the boundaries + cheby_nodes, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes) + nodes = SVector{mesh_nnodes}(cheby_nodes) + + # Allocate the memory for the tree node coordinates + tree_node_coordinates = Array{RealT, n_dimensions + 2}(undef, n_dimensions, + ntuple(_ -> length(nodes), + n_dimensions)..., + n_trees) + + # Compute the tree node coordinates and return the updated file index + file_idx = calc_tree_node_coordinates!(tree_node_coordinates, file_lines, nodes, + vertices, RealT) + + # Allocate the memory for the boundary labels + boundary_names = Array{Symbol}(undef, (2 * n_dimensions, n_trees)) + + # Read in the boundary names from the last portion of the meshfile + # Note here the boundary names where "---" means an internal connection + for tree in 1:n_trees + current_line = split(file_lines[file_idx]) + boundary_names[:, tree] = map(Symbol, current_line[2:end]) + file_idx += 1 + end - p4est = new_p4est(connectivity, initial_refinement_level) + p4est = new_p4est(connectivity, initial_refinement_level) - return p4est, tree_node_coordinates, nodes, boundary_names + return p4est, tree_node_coordinates, nodes, boundary_names end - # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1] # and a list of boundary names for the `P4estMesh`. The tree node coordinates are computed according to # the `mapping` passed to this function using polynomial interpolants of degree `polydeg`. All boundary # names are given the name `:all`. -function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg, initial_refinement_level, n_dimensions, RealT) - # Create the mesh connectivity using `p4est` - connectivity = read_inp_p4est(meshfile, Val(n_dimensions)) - connectivity_obj = unsafe_load(connectivity) +function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg, + initial_refinement_level, n_dimensions, RealT) + # Create the mesh connectivity using `p4est` + connectivity = read_inp_p4est(meshfile, Val(n_dimensions)) + connectivity_obj = unsafe_load(connectivity) - # These need to be of the type Int for unsafe_wrap below to work - n_trees::Int = connectivity_obj.num_trees - n_vertices::Int = connectivity_obj.num_vertices + # These need to be of the type Int for unsafe_wrap below to work + n_trees::Int = connectivity_obj.num_trees + n_vertices::Int = connectivity_obj.num_vertices - vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices)) - tree_to_vertex = unsafe_wrap(Array, connectivity_obj.tree_to_vertex, (2^n_dimensions, n_trees)) + vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices)) + tree_to_vertex = unsafe_wrap(Array, connectivity_obj.tree_to_vertex, + (2^n_dimensions, n_trees)) - basis = LobattoLegendreBasis(RealT, polydeg) - nodes = basis.nodes + basis = LobattoLegendreBasis(RealT, polydeg) + nodes = basis.nodes - tree_node_coordinates = Array{RealT, n_dimensions+2}(undef, n_dimensions, - ntuple(_ -> length(nodes), n_dimensions)..., - n_trees) - calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, vertices, tree_to_vertex) + tree_node_coordinates = Array{RealT, n_dimensions + 2}(undef, n_dimensions, + ntuple(_ -> length(nodes), + n_dimensions)..., + n_trees) + calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, vertices, + tree_to_vertex) - p4est = new_p4est(connectivity, initial_refinement_level) + p4est = new_p4est(connectivity, initial_refinement_level) - # There's no simple and generic way to distinguish boundaries. Name all of them :all. - boundary_names = fill(:all, 2 * n_dimensions, n_trees) + # There's no simple and generic way to distinguish boundaries. Name all of them :all. + boundary_names = fill(:all, 2 * n_dimensions, n_trees) - return p4est, tree_node_coordinates, nodes, boundary_names + return p4est, tree_node_coordinates, nodes, boundary_names end - """ P4estMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness; polydeg, RealT=Float64, @@ -485,545 +502,562 @@ The mesh will have two boundaries, `:inside` and `:outside`. to permit more fine-grained partitioning. """ function P4estMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness; - polydeg, RealT=Float64, - initial_refinement_level=0, unsaved_changes=true, - p4est_partition_allow_for_coarsening=true) - connectivity = connectivity_cubed_sphere(trees_per_face_dimension, layers) + polydeg, RealT = Float64, + initial_refinement_level = 0, unsaved_changes = true, + p4est_partition_allow_for_coarsening = true) + connectivity = connectivity_cubed_sphere(trees_per_face_dimension, layers) - n_trees = 6 * trees_per_face_dimension^2 * layers + n_trees = 6 * trees_per_face_dimension^2 * layers - basis = LobattoLegendreBasis(RealT, polydeg) - nodes = basis.nodes + basis = LobattoLegendreBasis(RealT, polydeg) + nodes = basis.nodes - tree_node_coordinates = Array{RealT, 5}(undef, 3, - ntuple(_ -> length(nodes), 3)..., - n_trees) - calc_tree_node_coordinates!(tree_node_coordinates, nodes, trees_per_face_dimension, layers, - inner_radius, thickness) + tree_node_coordinates = Array{RealT, 5}(undef, 3, + ntuple(_ -> length(nodes), 3)..., + n_trees) + calc_tree_node_coordinates!(tree_node_coordinates, nodes, trees_per_face_dimension, + layers, + inner_radius, thickness) - p4est = new_p4est(connectivity, initial_refinement_level) + p4est = new_p4est(connectivity, initial_refinement_level) - boundary_names = fill(Symbol("---"), 2 * 3, n_trees) - boundary_names[5, :] .= Symbol("inside") - boundary_names[6, :] .= Symbol("outside") + boundary_names = fill(Symbol("---"), 2 * 3, n_trees) + boundary_names[5, :] .= Symbol("inside") + boundary_names[6, :] .= Symbol("outside") - return P4estMesh{3}(p4est, tree_node_coordinates, nodes, - boundary_names, "", unsaved_changes, - p4est_partition_allow_for_coarsening) + return P4estMesh{3}(p4est, tree_node_coordinates, nodes, + boundary_names, "", unsaved_changes, + p4est_partition_allow_for_coarsening) end - # Create a new p4est_connectivity that represents a structured rectangle. # Similar to p4est_connectivity_new_brick, but doesn't use Morton order. # This order makes `calc_tree_node_coordinates!` below and the calculation # of `boundary_names` above easier but is irrelevant otherwise. # 2D version function connectivity_structured(n_cells_x, n_cells_y, periodicity) - linear_indices = LinearIndices((n_cells_x, n_cells_y)) - - # Vertices represent the coordinates of the forest. This is used by `p4est` - # to write VTK files. - # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. - n_vertices = 0 - n_trees = n_cells_x * n_cells_y - # No corner connectivity is needed - n_corners = 0 - vertices = C_NULL - tree_to_vertex = C_NULL - - tree_to_tree = Array{p4est_topidx_t, 2}(undef, 4, n_trees) - tree_to_face = Array{Int8, 2}(undef, 4, n_trees) - - for cell_y in 1:n_cells_y, cell_x in 1:n_cells_x - tree = linear_indices[cell_x, cell_y] - - # Subtract 1 because `p4est` uses zero-based indexing - # Negative x-direction - if cell_x > 1 - tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y] - 1 - tree_to_face[1, tree] = 1 - elseif periodicity[1] - tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y] - 1 - tree_to_face[1, tree] = 1 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[1, tree] = tree - 1 - tree_to_face[1, tree] = 0 - end + linear_indices = LinearIndices((n_cells_x, n_cells_y)) + + # Vertices represent the coordinates of the forest. This is used by `p4est` + # to write VTK files. + # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. + n_vertices = 0 + n_trees = n_cells_x * n_cells_y + # No corner connectivity is needed + n_corners = 0 + vertices = C_NULL + tree_to_vertex = C_NULL + + tree_to_tree = Array{p4est_topidx_t, 2}(undef, 4, n_trees) + tree_to_face = Array{Int8, 2}(undef, 4, n_trees) + + for cell_y in 1:n_cells_y, cell_x in 1:n_cells_x + tree = linear_indices[cell_x, cell_y] + + # Subtract 1 because `p4est` uses zero-based indexing + # Negative x-direction + if cell_x > 1 + tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y] - 1 + tree_to_face[1, tree] = 1 + elseif periodicity[1] + tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y] - 1 + tree_to_face[1, tree] = 1 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[1, tree] = tree - 1 + tree_to_face[1, tree] = 0 + end - # Positive x-direction - if cell_x < n_cells_x - tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y] - 1 - tree_to_face[2, tree] = 0 - elseif periodicity[1] - tree_to_tree[2, tree] = linear_indices[1, cell_y] - 1 - tree_to_face[2, tree] = 0 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[2, tree] = tree - 1 - tree_to_face[2, tree] = 1 - end + # Positive x-direction + if cell_x < n_cells_x + tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y] - 1 + tree_to_face[2, tree] = 0 + elseif periodicity[1] + tree_to_tree[2, tree] = linear_indices[1, cell_y] - 1 + tree_to_face[2, tree] = 0 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[2, tree] = tree - 1 + tree_to_face[2, tree] = 1 + end - # Negative y-direction - if cell_y > 1 - tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1] - 1 - tree_to_face[3, tree] = 3 - elseif periodicity[2] - tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y] - 1 - tree_to_face[3, tree] = 3 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[3, tree] = tree - 1 - tree_to_face[3, tree] = 2 - end + # Negative y-direction + if cell_y > 1 + tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1] - 1 + tree_to_face[3, tree] = 3 + elseif periodicity[2] + tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y] - 1 + tree_to_face[3, tree] = 3 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[3, tree] = tree - 1 + tree_to_face[3, tree] = 2 + end - # Positive y-direction - if cell_y < n_cells_y - tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1] - 1 - tree_to_face[4, tree] = 2 - elseif periodicity[2] - tree_to_tree[4, tree] = linear_indices[cell_x, 1] - 1 - tree_to_face[4, tree] = 2 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[4, tree] = tree - 1 - tree_to_face[4, tree] = 3 + # Positive y-direction + if cell_y < n_cells_y + tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1] - 1 + tree_to_face[4, tree] = 2 + elseif periodicity[2] + tree_to_tree[4, tree] = linear_indices[cell_x, 1] - 1 + tree_to_face[4, tree] = 2 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[4, tree] = tree - 1 + tree_to_face[4, tree] = 3 + end end - end - tree_to_corner = C_NULL - # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." - # We don't need corner connectivity, so this is a trivial case. - ctt_offset = zeros(p4est_topidx_t, 1) + tree_to_corner = C_NULL + # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." + # We don't need corner connectivity, so this is a trivial case. + ctt_offset = zeros(p4est_topidx_t, 1) - corner_to_tree = C_NULL - corner_to_corner = C_NULL + corner_to_tree = C_NULL + corner_to_corner = C_NULL - connectivity = p4est_connectivity_new_copy(n_vertices, n_trees, n_corners, - vertices, tree_to_vertex, - tree_to_tree, tree_to_face, - tree_to_corner, ctt_offset, - corner_to_tree, corner_to_corner) + connectivity = p4est_connectivity_new_copy(n_vertices, n_trees, n_corners, + vertices, tree_to_vertex, + tree_to_tree, tree_to_face, + tree_to_corner, ctt_offset, + corner_to_tree, corner_to_corner) - @assert p4est_connectivity_is_valid(connectivity) == 1 + @assert p4est_connectivity_is_valid(connectivity) == 1 - return connectivity + return connectivity end # 3D version function connectivity_structured(n_cells_x, n_cells_y, n_cells_z, periodicity) - linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z)) - - # Vertices represent the coordinates of the forest. This is used by `p4est` - # to write VTK files. - # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. - n_vertices = 0 - n_trees = n_cells_x * n_cells_y * n_cells_z - # No edge connectivity is needed - n_edges = 0 - # No corner connectivity is needed - n_corners = 0 - vertices = C_NULL - tree_to_vertex = C_NULL - - tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees) - tree_to_face = Array{Int8, 2}(undef, 6, n_trees) - - for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x - tree = linear_indices[cell_x, cell_y, cell_z] - - # Subtract 1 because `p4est` uses zero-based indexing - # Negative x-direction - if cell_x > 1 - tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z] - 1 - tree_to_face[1, tree] = 1 - elseif periodicity[1] - tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y, cell_z] - 1 - tree_to_face[1, tree] = 1 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[1, tree] = tree - 1 - tree_to_face[1, tree] = 0 - end + linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z)) + + # Vertices represent the coordinates of the forest. This is used by `p4est` + # to write VTK files. + # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. + n_vertices = 0 + n_trees = n_cells_x * n_cells_y * n_cells_z + # No edge connectivity is needed + n_edges = 0 + # No corner connectivity is needed + n_corners = 0 + vertices = C_NULL + tree_to_vertex = C_NULL + + tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees) + tree_to_face = Array{Int8, 2}(undef, 6, n_trees) - # Positive x-direction - if cell_x < n_cells_x - tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z] - 1 - tree_to_face[2, tree] = 0 - elseif periodicity[1] - tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z] - 1 - tree_to_face[2, tree] = 0 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[2, tree] = tree - 1 - tree_to_face[2, tree] = 1 - end + for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x + tree = linear_indices[cell_x, cell_y, cell_z] + + # Subtract 1 because `p4est` uses zero-based indexing + # Negative x-direction + if cell_x > 1 + tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z] - 1 + tree_to_face[1, tree] = 1 + elseif periodicity[1] + tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y, cell_z] - 1 + tree_to_face[1, tree] = 1 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[1, tree] = tree - 1 + tree_to_face[1, tree] = 0 + end - # Negative y-direction - if cell_y > 1 - tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z] - 1 - tree_to_face[3, tree] = 3 - elseif periodicity[2] - tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y, cell_z] - 1 - tree_to_face[3, tree] = 3 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[3, tree] = tree - 1 - tree_to_face[3, tree] = 2 - end + # Positive x-direction + if cell_x < n_cells_x + tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z] - 1 + tree_to_face[2, tree] = 0 + elseif periodicity[1] + tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z] - 1 + tree_to_face[2, tree] = 0 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[2, tree] = tree - 1 + tree_to_face[2, tree] = 1 + end - # Positive y-direction - if cell_y < n_cells_y - tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z] - 1 - tree_to_face[4, tree] = 2 - elseif periodicity[2] - tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z] - 1 - tree_to_face[4, tree] = 2 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[4, tree] = tree - 1 - tree_to_face[4, tree] = 3 - end + # Negative y-direction + if cell_y > 1 + tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z] - 1 + tree_to_face[3, tree] = 3 + elseif periodicity[2] + tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y, cell_z] - 1 + tree_to_face[3, tree] = 3 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[3, tree] = tree - 1 + tree_to_face[3, tree] = 2 + end - # Negative z-direction - if cell_z > 1 - tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1] - 1 - tree_to_face[5, tree] = 5 - elseif periodicity[3] - tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, n_cells_z] - 1 - tree_to_face[5, tree] = 5 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[5, tree] = tree - 1 - tree_to_face[5, tree] = 4 - end + # Positive y-direction + if cell_y < n_cells_y + tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z] - 1 + tree_to_face[4, tree] = 2 + elseif periodicity[2] + tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z] - 1 + tree_to_face[4, tree] = 2 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[4, tree] = tree - 1 + tree_to_face[4, tree] = 3 + end + + # Negative z-direction + if cell_z > 1 + tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1] - 1 + tree_to_face[5, tree] = 5 + elseif periodicity[3] + tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, n_cells_z] - 1 + tree_to_face[5, tree] = 5 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[5, tree] = tree - 1 + tree_to_face[5, tree] = 4 + end - # Positive z-direction - if cell_z < n_cells_z - tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1] - 1 - tree_to_face[6, tree] = 4 - elseif periodicity[3] - tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, 1] - 1 - tree_to_face[6, tree] = 4 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[6, tree] = tree - 1 - tree_to_face[6, tree] = 5 + # Positive z-direction + if cell_z < n_cells_z + tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1] - 1 + tree_to_face[6, tree] = 4 + elseif periodicity[3] + tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, 1] - 1 + tree_to_face[6, tree] = 4 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[6, tree] = tree - 1 + tree_to_face[6, tree] = 5 + end end - end - - tree_to_edge = C_NULL - # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." - # We don't need edge connectivity, so this is a trivial case. - ett_offset = zeros(p4est_topidx_t, 1) - edge_to_tree = C_NULL - edge_to_edge = C_NULL - - tree_to_corner = C_NULL - # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." - # We don't need corner connectivity, so this is a trivial case. - ctt_offset = zeros(p4est_topidx_t, 1) - - corner_to_tree = C_NULL - corner_to_corner = C_NULL - - connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges, - vertices, tree_to_vertex, - tree_to_tree, tree_to_face, - tree_to_edge, ett_offset, - edge_to_tree, edge_to_edge, - tree_to_corner, ctt_offset, - corner_to_tree, corner_to_corner) - - @assert p8est_connectivity_is_valid(connectivity) == 1 - - return connectivity -end + tree_to_edge = C_NULL + # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." + # We don't need edge connectivity, so this is a trivial case. + ett_offset = zeros(p4est_topidx_t, 1) + edge_to_tree = C_NULL + edge_to_edge = C_NULL + + tree_to_corner = C_NULL + # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." + # We don't need corner connectivity, so this is a trivial case. + ctt_offset = zeros(p4est_topidx_t, 1) + + corner_to_tree = C_NULL + corner_to_corner = C_NULL + + connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges, + vertices, tree_to_vertex, + tree_to_tree, tree_to_face, + tree_to_edge, ett_offset, + edge_to_tree, edge_to_edge, + tree_to_corner, ctt_offset, + corner_to_tree, corner_to_corner) + + @assert p8est_connectivity_is_valid(connectivity) == 1 + + return connectivity +end function connectivity_cubed_sphere(trees_per_face_dimension, layers) - n_cells_x = n_cells_y = trees_per_face_dimension - n_cells_z = layers - - linear_indices = LinearIndices((trees_per_face_dimension, trees_per_face_dimension, layers, 6)) - - # Vertices represent the coordinates of the forest. This is used by `p4est` - # to write VTK files. - # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. - n_vertices = 0 - n_trees = 6 * n_cells_x * n_cells_y * n_cells_z - # No edge connectivity is needed - n_edges = 0 - # No corner connectivity is needed - n_corners = 0 - vertices = C_NULL - tree_to_vertex = C_NULL - - tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees) - tree_to_face = Array{Int8, 2}(undef, 6, n_trees) - - # Illustration of the local coordinates of each face. ξ and η are the first - # local coordinates of each face. The third local coordinate ζ is always - # pointing outwards, which yields a right-handed coordinate system for each face. - # ┌────────────────────────────────────────────────────┐ - # ╱│ ╱│ - # ╱ │ ξ <───┐ ╱ │ - # ╱ │ ╱ ╱ │ - # ╱ │ 4 (+y) V ╱ │ - # ╱ │ η ╱ │ - # ╱ │ ╱ │ - # ╱ │ ╱ │ - # ╱ │ ╱ │ - # ╱ │ ╱ │ - # ╱ │ 5 (-z) η ╱ │ - # ╱ │ ↑ ╱ │ - # ╱ │ │ ╱ │ - # ╱ │ ξ <───┘ ╱ │ - # ┌────────────────────────────────────────────────────┐ 2 (+x) │ - # │ │ │ │ - # │ │ │ ξ │ - # │ │ │ ↑ │ - # │ 1 (-x) │ │ │ │ - # │ │ │ │ │ - # │ ╱│ │ │ ╱ │ - # │ V │ │ │ V │ - # │ η ↓ │ │ η │ - # │ ξ └──────────────────────────────────────│─────────────┘ - # │ ╱ η 6 (+z) │ ╱ - # │ ╱ ↑ │ ╱ - # │ ╱ │ │ ╱ - # │ ╱ └───> ξ │ ╱ - # │ ╱ │ ╱ - # │ ╱ │ ╱ Global coordinates: - # │ ╱ │ ╱ y - # │ ╱ ┌───> ξ │ ╱ ↑ - # │ ╱ ╱ │ ╱ │ - # │ ╱ V 3 (-y) │ ╱ │ - # │ ╱ η │ ╱ └─────> x - # │ ╱ │ ╱ ╱ - # │╱ │╱ V - # └────────────────────────────────────────────────────┘ z - for direction in 1:6 - for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x - tree = linear_indices[cell_x, cell_y, cell_z, direction] - - # Subtract 1 because `p4est` uses zero-based indexing - # Negative x-direction - if cell_x > 1 # Connect to tree at the same face - tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z, direction] - 1 - tree_to_face[1, tree] = 1 - elseif direction == 1 # This is the -x face - target = 4 - tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 - tree_to_face[1, tree] = 1 - elseif direction == 2 # This is the +x face - target = 3 - tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 - tree_to_face[1, tree] = 1 - elseif direction == 3 # This is the -y face - target = 1 - tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 - tree_to_face[1, tree] = 1 - elseif direction == 4 # This is the +y face - target = 2 - tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 - tree_to_face[1, tree] = 1 - elseif direction == 5 # This is the -z face - target = 2 - tree_to_tree[1, tree] = linear_indices[cell_y, 1, cell_z, target] - 1 - tree_to_face[1, tree] = 2 - else # direction == 6, this is the +z face - target = 1 - tree_to_tree[1, tree] = linear_indices[end - cell_y + 1, end, cell_z, target] - 1 - tree_to_face[1, tree] = 9 # first face dimensions are oppositely oriented, add 6 - end - - # Positive x-direction - if cell_x < n_cells_x # Connect to tree at the same face - tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z, direction] - 1 - tree_to_face[2, tree] = 0 - elseif direction == 1 # This is the -x face - target = 3 - tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 - tree_to_face[2, tree] = 0 - elseif direction == 2 # This is the +x face - target = 4 - tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 - tree_to_face[2, tree] = 0 - elseif direction == 3 # This is the -y face - target = 2 - tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 - tree_to_face[2, tree] = 0 - elseif direction == 4 # This is the +y face - target = 1 - tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 - tree_to_face[2, tree] = 0 - elseif direction == 5 # This is the -z face - target = 1 - tree_to_tree[2, tree] = linear_indices[end - cell_y + 1, 1, cell_z, target] - 1 - tree_to_face[2, tree] = 8 # first face dimensions are oppositely oriented, add 6 - else # direction == 6, this is the +z face - target = 2 - tree_to_tree[2, tree] = linear_indices[cell_y, end, cell_z, target] - 1 - tree_to_face[2, tree] = 3 - end - - # Negative y-direction - if cell_y > 1 # Connect to tree at the same face - tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z, direction] - 1 - tree_to_face[3, tree] = 3 - elseif direction == 1 - target = 5 - tree_to_tree[3, tree] = linear_indices[end, end - cell_x + 1, cell_z, target] - 1 - tree_to_face[3, tree] = 7 # first face dimensions are oppositely oriented, add 6 - elseif direction == 2 - target = 5 - tree_to_tree[3, tree] = linear_indices[1, cell_x, cell_z, target] - 1 - tree_to_face[3, tree] = 0 - elseif direction == 3 - target = 5 - tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, target] - 1 - tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6 - elseif direction == 4 - target = 5 - tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1 - tree_to_face[3, tree] = 3 - elseif direction == 5 - target = 3 - tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, target] - 1 - tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6 - else # direction == 6 - target = 3 - tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1 - tree_to_face[3, tree] = 3 - end - - # Positive y-direction - if cell_y < n_cells_y # Connect to tree at the same face - tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z, direction] - 1 - tree_to_face[4, tree] = 2 - elseif direction == 1 - target = 6 - tree_to_tree[4, tree] = linear_indices[1, end - cell_x + 1, cell_z, target] - 1 - tree_to_face[4, tree] = 6 # first face dimensions are oppositely oriented, add 6 - elseif direction == 2 - target = 6 - tree_to_tree[4, tree] = linear_indices[end, cell_x, cell_z, target] - 1 - tree_to_face[4, tree] = 1 - elseif direction == 3 - target = 6 - tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1 - tree_to_face[4, tree] = 2 - elseif direction == 4 - target = 6 - tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, target] - 1 - tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6 - elseif direction == 5 - target = 4 - tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1 - tree_to_face[4, tree] = 2 - else # direction == 6 - target = 4 - tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, target] - 1 - tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6 - end - - # Negative z-direction - if cell_z > 1 - tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1, direction] - 1 - tree_to_face[5, tree] = 5 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[5, tree] = tree - 1 - tree_to_face[5, tree] = 4 - end - - # Positive z-direction - if cell_z < n_cells_z - tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1, direction] - 1 - tree_to_face[6, tree] = 4 - else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) - tree_to_tree[6, tree] = tree - 1 - tree_to_face[6, tree] = 5 - end + n_cells_x = n_cells_y = trees_per_face_dimension + n_cells_z = layers + + linear_indices = LinearIndices((trees_per_face_dimension, trees_per_face_dimension, + layers, 6)) + + # Vertices represent the coordinates of the forest. This is used by `p4est` + # to write VTK files. + # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty. + n_vertices = 0 + n_trees = 6 * n_cells_x * n_cells_y * n_cells_z + # No edge connectivity is needed + n_edges = 0 + # No corner connectivity is needed + n_corners = 0 + vertices = C_NULL + tree_to_vertex = C_NULL + + tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees) + tree_to_face = Array{Int8, 2}(undef, 6, n_trees) + + # Illustration of the local coordinates of each face. ξ and η are the first + # local coordinates of each face. The third local coordinate ζ is always + # pointing outwards, which yields a right-handed coordinate system for each face. + # ┌────────────────────────────────────────────────────┐ + # ╱│ ╱│ + # ╱ │ ξ <───┐ ╱ │ + # ╱ │ ╱ ╱ │ + # ╱ │ 4 (+y) V ╱ │ + # ╱ │ η ╱ │ + # ╱ │ ╱ │ + # ╱ │ ╱ │ + # ╱ │ ╱ │ + # ╱ │ ╱ │ + # ╱ │ 5 (-z) η ╱ │ + # ╱ │ ↑ ╱ │ + # ╱ │ │ ╱ │ + # ╱ │ ξ <───┘ ╱ │ + # ┌────────────────────────────────────────────────────┐ 2 (+x) │ + # │ │ │ │ + # │ │ │ ξ │ + # │ │ │ ↑ │ + # │ 1 (-x) │ │ │ │ + # │ │ │ │ │ + # │ ╱│ │ │ ╱ │ + # │ V │ │ │ V │ + # │ η ↓ │ │ η │ + # │ ξ └──────────────────────────────────────│─────────────┘ + # │ ╱ η 6 (+z) │ ╱ + # │ ╱ ↑ │ ╱ + # │ ╱ │ │ ╱ + # │ ╱ └───> ξ │ ╱ + # │ ╱ │ ╱ + # │ ╱ │ ╱ Global coordinates: + # │ ╱ │ ╱ y + # │ ╱ ┌───> ξ │ ╱ ↑ + # │ ╱ ╱ │ ╱ │ + # │ ╱ V 3 (-y) │ ╱ │ + # │ ╱ η │ ╱ └─────> x + # │ ╱ │ ╱ ╱ + # │╱ │╱ V + # └────────────────────────────────────────────────────┘ z + for direction in 1:6 + for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x + tree = linear_indices[cell_x, cell_y, cell_z, direction] + + # Subtract 1 because `p4est` uses zero-based indexing + # Negative x-direction + if cell_x > 1 # Connect to tree at the same face + tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z, + direction] - 1 + tree_to_face[1, tree] = 1 + elseif direction == 1 # This is the -x face + target = 4 + tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 + tree_to_face[1, tree] = 1 + elseif direction == 2 # This is the +x face + target = 3 + tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 + tree_to_face[1, tree] = 1 + elseif direction == 3 # This is the -y face + target = 1 + tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 + tree_to_face[1, tree] = 1 + elseif direction == 4 # This is the +y face + target = 2 + tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1 + tree_to_face[1, tree] = 1 + elseif direction == 5 # This is the -z face + target = 2 + tree_to_tree[1, tree] = linear_indices[cell_y, 1, cell_z, target] - 1 + tree_to_face[1, tree] = 2 + else # direction == 6, this is the +z face + target = 1 + tree_to_tree[1, tree] = linear_indices[end - cell_y + 1, end, cell_z, + target] - 1 + tree_to_face[1, tree] = 9 # first face dimensions are oppositely oriented, add 6 + end + + # Positive x-direction + if cell_x < n_cells_x # Connect to tree at the same face + tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z, + direction] - 1 + tree_to_face[2, tree] = 0 + elseif direction == 1 # This is the -x face + target = 3 + tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 + tree_to_face[2, tree] = 0 + elseif direction == 2 # This is the +x face + target = 4 + tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 + tree_to_face[2, tree] = 0 + elseif direction == 3 # This is the -y face + target = 2 + tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 + tree_to_face[2, tree] = 0 + elseif direction == 4 # This is the +y face + target = 1 + tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1 + tree_to_face[2, tree] = 0 + elseif direction == 5 # This is the -z face + target = 1 + tree_to_tree[2, tree] = linear_indices[end - cell_y + 1, 1, cell_z, + target] - 1 + tree_to_face[2, tree] = 8 # first face dimensions are oppositely oriented, add 6 + else # direction == 6, this is the +z face + target = 2 + tree_to_tree[2, tree] = linear_indices[cell_y, end, cell_z, target] - 1 + tree_to_face[2, tree] = 3 + end + + # Negative y-direction + if cell_y > 1 # Connect to tree at the same face + tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z, + direction] - 1 + tree_to_face[3, tree] = 3 + elseif direction == 1 + target = 5 + tree_to_tree[3, tree] = linear_indices[end, end - cell_x + 1, cell_z, + target] - 1 + tree_to_face[3, tree] = 7 # first face dimensions are oppositely oriented, add 6 + elseif direction == 2 + target = 5 + tree_to_tree[3, tree] = linear_indices[1, cell_x, cell_z, target] - 1 + tree_to_face[3, tree] = 0 + elseif direction == 3 + target = 5 + tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, + target] - 1 + tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6 + elseif direction == 4 + target = 5 + tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1 + tree_to_face[3, tree] = 3 + elseif direction == 5 + target = 3 + tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, + target] - 1 + tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6 + else # direction == 6 + target = 3 + tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1 + tree_to_face[3, tree] = 3 + end + + # Positive y-direction + if cell_y < n_cells_y # Connect to tree at the same face + tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z, + direction] - 1 + tree_to_face[4, tree] = 2 + elseif direction == 1 + target = 6 + tree_to_tree[4, tree] = linear_indices[1, end - cell_x + 1, cell_z, + target] - 1 + tree_to_face[4, tree] = 6 # first face dimensions are oppositely oriented, add 6 + elseif direction == 2 + target = 6 + tree_to_tree[4, tree] = linear_indices[end, cell_x, cell_z, target] - 1 + tree_to_face[4, tree] = 1 + elseif direction == 3 + target = 6 + tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1 + tree_to_face[4, tree] = 2 + elseif direction == 4 + target = 6 + tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, + target] - 1 + tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6 + elseif direction == 5 + target = 4 + tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1 + tree_to_face[4, tree] = 2 + else # direction == 6 + target = 4 + tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, + target] - 1 + tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6 + end + + # Negative z-direction + if cell_z > 1 + tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1, + direction] - 1 + tree_to_face[5, tree] = 5 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[5, tree] = tree - 1 + tree_to_face[5, tree] = 4 + end + + # Positive z-direction + if cell_z < n_cells_z + tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1, + direction] - 1 + tree_to_face[6, tree] = 4 + else # Non-periodic boundary, tree and face point to themselves (zero-based indexing) + tree_to_tree[6, tree] = tree - 1 + tree_to_face[6, tree] = 5 + end + end end - end - - tree_to_edge = C_NULL - # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." - # We don't need edge connectivity, so this is a trivial case. - ett_offset = zeros(p4est_topidx_t, 1) - edge_to_tree = C_NULL - edge_to_edge = C_NULL - - tree_to_corner = C_NULL - # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." - # We don't need corner connectivity, so this is a trivial case. - ctt_offset = zeros(p4est_topidx_t, 1) - - corner_to_tree = C_NULL - corner_to_corner = C_NULL - - connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges, - vertices, tree_to_vertex, - tree_to_tree, tree_to_face, - tree_to_edge, ett_offset, - edge_to_tree, edge_to_edge, - tree_to_corner, ctt_offset, - corner_to_tree, corner_to_corner) - - @assert p8est_connectivity_is_valid(connectivity) == 1 - - return connectivity -end + tree_to_edge = C_NULL + # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." + # We don't need edge connectivity, so this is a trivial case. + ett_offset = zeros(p4est_topidx_t, 1) + edge_to_tree = C_NULL + edge_to_edge = C_NULL + + tree_to_corner = C_NULL + # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0." + # We don't need corner connectivity, so this is a trivial case. + ctt_offset = zeros(p4est_topidx_t, 1) + + corner_to_tree = C_NULL + corner_to_corner = C_NULL + + connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges, + vertices, tree_to_vertex, + tree_to_tree, tree_to_face, + tree_to_edge, ett_offset, + edge_to_tree, edge_to_edge, + tree_to_corner, ctt_offset, + corner_to_tree, corner_to_corner) + + @assert p8est_connectivity_is_valid(connectivity) == 1 + + return connectivity +end # Calculate physical coordinates of each node of a structured mesh. # This function assumes a structured mesh with trees in row order. # 2D version function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, nodes, mapping, trees_per_dimension) - linear_indices = LinearIndices(trees_per_dimension) - - # Get cell length in reference mesh - dx = 2 / trees_per_dimension[1] - dy = 2 / trees_per_dimension[2] - - for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1] - tree_id = linear_indices[cell_x, cell_y] + linear_indices = LinearIndices(trees_per_dimension) - # Calculate node coordinates of reference mesh - cell_x_offset = -1 + (cell_x-1) * dx + dx/2 - cell_y_offset = -1 + (cell_y-1) * dy + dy/2 + # Get cell length in reference mesh + dx = 2 / trees_per_dimension[1] + dy = 2 / trees_per_dimension[2] - for j in eachindex(nodes), i in eachindex(nodes) - # node_coordinates are the mapped reference node coordinates - node_coordinates[:, i, j, tree_id] .= mapping(cell_x_offset + dx/2 * nodes[i], - cell_y_offset + dy/2 * nodes[j]) + for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1] + tree_id = linear_indices[cell_x, cell_y] + + # Calculate node coordinates of reference mesh + cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2 + cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2 + + for j in eachindex(nodes), i in eachindex(nodes) + # node_coordinates are the mapped reference node coordinates + node_coordinates[:, i, j, tree_id] .= mapping(cell_x_offset + + dx / 2 * nodes[i], + cell_y_offset + + dy / 2 * nodes[j]) + end end - end end # 3D version function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, nodes, mapping, trees_per_dimension) - linear_indices = LinearIndices(trees_per_dimension) - - # Get cell length in reference mesh - dx = 2 / trees_per_dimension[1] - dy = 2 / trees_per_dimension[2] - dz = 2 / trees_per_dimension[3] - - for cell_z in 1:trees_per_dimension[3], - cell_y in 1:trees_per_dimension[2], - cell_x in 1:trees_per_dimension[1] - - tree_id = linear_indices[cell_x, cell_y, cell_z] - - # Calculate node coordinates of reference mesh - cell_x_offset = -1 + (cell_x-1) * dx + dx/2 - cell_y_offset = -1 + (cell_y-1) * dy + dy/2 - cell_z_offset = -1 + (cell_z-1) * dz + dz/2 - - for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) - # node_coordinates are the mapped reference node coordinates - node_coordinates[:, i, j, k, tree_id] .= mapping(cell_x_offset + dx/2 * nodes[i], - cell_y_offset + dy/2 * nodes[j], - cell_z_offset + dz/2 * nodes[k]) + linear_indices = LinearIndices(trees_per_dimension) + + # Get cell length in reference mesh + dx = 2 / trees_per_dimension[1] + dy = 2 / trees_per_dimension[2] + dz = 2 / trees_per_dimension[3] + + for cell_z in 1:trees_per_dimension[3], + cell_y in 1:trees_per_dimension[2], + cell_x in 1:trees_per_dimension[1] + + tree_id = linear_indices[cell_x, cell_y, cell_z] + + # Calculate node coordinates of reference mesh + cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2 + cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2 + cell_z_offset = -1 + (cell_z - 1) * dz + dz / 2 + + for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) + # node_coordinates are the mapped reference node coordinates + node_coordinates[:, i, j, k, tree_id] .= mapping(cell_x_offset + + dx / 2 * nodes[i], + cell_y_offset + + dy / 2 * nodes[j], + cell_z_offset + + dz / 2 * nodes[k]) + end end - end end - # Calculate physical coordinates of each node of an unstructured mesh. # Extract corners of each tree from the connectivity, # interpolate to requested interpolation nodes, @@ -1031,389 +1065,408 @@ end # 2D version function calc_tree_node_coordinates!(node_coordinates::AbstractArray{RealT, 4}, nodes, mapping, - vertices, tree_to_vertex) where RealT - nodes_in = [-1.0, 1.0] - matrix = polynomial_interpolation_matrix(nodes_in, nodes) - data_in = Array{RealT, 3}(undef, 2, 2, 2) - tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in)) - - for tree in 1:size(tree_to_vertex, 2) - # Tree vertices are stored in Z-order, ignore z-coordinate in 2D, zero-based indexing - @views data_in[:, 1, 1] .= vertices[1:2, tree_to_vertex[1, tree] + 1] - @views data_in[:, 2, 1] .= vertices[1:2, tree_to_vertex[2, tree] + 1] - @views data_in[:, 1, 2] .= vertices[1:2, tree_to_vertex[3, tree] + 1] - @views data_in[:, 2, 2] .= vertices[1:2, tree_to_vertex[4, tree] + 1] - - # Interpolate corner coordinates to specified nodes - multiply_dimensionwise!( - view(node_coordinates, :, :, :, tree), - matrix, matrix, - data_in, - tmp1 - ) - end - - map_node_coordinates!(node_coordinates, mapping) + vertices, tree_to_vertex) where {RealT} + nodes_in = [-1.0, 1.0] + matrix = polynomial_interpolation_matrix(nodes_in, nodes) + data_in = Array{RealT, 3}(undef, 2, 2, 2) + tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in)) + + for tree in 1:size(tree_to_vertex, 2) + # Tree vertices are stored in Z-order, ignore z-coordinate in 2D, zero-based indexing + @views data_in[:, 1, 1] .= vertices[1:2, tree_to_vertex[1, tree] + 1] + @views data_in[:, 2, 1] .= vertices[1:2, tree_to_vertex[2, tree] + 1] + @views data_in[:, 1, 2] .= vertices[1:2, tree_to_vertex[3, tree] + 1] + @views data_in[:, 2, 2] .= vertices[1:2, tree_to_vertex[4, tree] + 1] + + # Interpolate corner coordinates to specified nodes + multiply_dimensionwise!(view(node_coordinates, :, :, :, tree), + matrix, matrix, + data_in, + tmp1) + end + + map_node_coordinates!(node_coordinates, mapping) end function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, mapping) - for tree in axes(node_coordinates, 4), - j in axes(node_coordinates, 3), - i in axes(node_coordinates, 2) + for tree in axes(node_coordinates, 4), + j in axes(node_coordinates, 3), + i in axes(node_coordinates, 2) - node_coordinates[:, i, j, tree] .= mapping(node_coordinates[1, i, j, tree], - node_coordinates[2, i, j, tree]) - end + node_coordinates[:, i, j, tree] .= mapping(node_coordinates[1, i, j, tree], + node_coordinates[2, i, j, tree]) + end - return node_coordinates + return node_coordinates end -function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, mapping::Nothing) - return node_coordinates +function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, + mapping::Nothing) + return node_coordinates end # 3D version function calc_tree_node_coordinates!(node_coordinates::AbstractArray{RealT, 5}, nodes, mapping, - vertices, tree_to_vertex) where RealT - nodes_in = [-1.0, 1.0] - matrix = polynomial_interpolation_matrix(nodes_in, nodes) - data_in = Array{RealT, 4}(undef, 3, 2, 2, 2) - - for tree in 1:size(tree_to_vertex, 2) - # Tree vertices are stored in Z-order, zero-based indexing - @views data_in[:, 1, 1, 1] .= vertices[:, tree_to_vertex[1, tree] + 1] - @views data_in[:, 2, 1, 1] .= vertices[:, tree_to_vertex[2, tree] + 1] - @views data_in[:, 1, 2, 1] .= vertices[:, tree_to_vertex[3, tree] + 1] - @views data_in[:, 2, 2, 1] .= vertices[:, tree_to_vertex[4, tree] + 1] - @views data_in[:, 1, 1, 2] .= vertices[:, tree_to_vertex[5, tree] + 1] - @views data_in[:, 2, 1, 2] .= vertices[:, tree_to_vertex[6, tree] + 1] - @views data_in[:, 1, 2, 2] .= vertices[:, tree_to_vertex[7, tree] + 1] - @views data_in[:, 2, 2, 2] .= vertices[:, tree_to_vertex[8, tree] + 1] - - # Interpolate corner coordinates to specified nodes - multiply_dimensionwise!( - view(node_coordinates, :, :, :, :, tree), - matrix, matrix, matrix, - data_in - ) - end - - map_node_coordinates!(node_coordinates, mapping) + vertices, tree_to_vertex) where {RealT} + nodes_in = [-1.0, 1.0] + matrix = polynomial_interpolation_matrix(nodes_in, nodes) + data_in = Array{RealT, 4}(undef, 3, 2, 2, 2) + + for tree in 1:size(tree_to_vertex, 2) + # Tree vertices are stored in Z-order, zero-based indexing + @views data_in[:, 1, 1, 1] .= vertices[:, tree_to_vertex[1, tree] + 1] + @views data_in[:, 2, 1, 1] .= vertices[:, tree_to_vertex[2, tree] + 1] + @views data_in[:, 1, 2, 1] .= vertices[:, tree_to_vertex[3, tree] + 1] + @views data_in[:, 2, 2, 1] .= vertices[:, tree_to_vertex[4, tree] + 1] + @views data_in[:, 1, 1, 2] .= vertices[:, tree_to_vertex[5, tree] + 1] + @views data_in[:, 2, 1, 2] .= vertices[:, tree_to_vertex[6, tree] + 1] + @views data_in[:, 1, 2, 2] .= vertices[:, tree_to_vertex[7, tree] + 1] + @views data_in[:, 2, 2, 2] .= vertices[:, tree_to_vertex[8, tree] + 1] + + # Interpolate corner coordinates to specified nodes + multiply_dimensionwise!(view(node_coordinates, :, :, :, :, tree), + matrix, matrix, matrix, + data_in) + end + + map_node_coordinates!(node_coordinates, mapping) end function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, mapping) - for tree in axes(node_coordinates, 5), - k in axes(node_coordinates, 4), - j in axes(node_coordinates, 3), - i in axes(node_coordinates, 2) - - node_coordinates[:, i, j, k, tree] .= mapping(node_coordinates[1, i, j, k, tree], - node_coordinates[2, i, j, k, tree], - node_coordinates[3, i, j, k, tree]) - end + for tree in axes(node_coordinates, 5), + k in axes(node_coordinates, 4), + j in axes(node_coordinates, 3), + i in axes(node_coordinates, 2) + + node_coordinates[:, i, j, k, tree] .= mapping(node_coordinates[1, i, j, k, + tree], + node_coordinates[2, i, j, k, + tree], + node_coordinates[3, i, j, k, + tree]) + end - return node_coordinates + return node_coordinates end -function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, mapping::Nothing) - return node_coordinates +function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, + mapping::Nothing) + return node_coordinates end - # Calculate physical coordinates of each node of a cubed sphere mesh. function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, - nodes, trees_per_face_dimension, layers, inner_radius, thickness) - n_cells_x = n_cells_y = trees_per_face_dimension - n_cells_z = layers - - linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z, 6)) - - # Get cell length in reference mesh - dx = 2 / n_cells_x - dy = 2 / n_cells_y - dz = 2 / n_cells_z - - for direction in 1:6 - for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x - tree = linear_indices[cell_x, cell_y, cell_z, direction] - - x_offset = -1 + (cell_x - 1) * dx + dx/2 - y_offset = -1 + (cell_y - 1) * dy + dy/2 - z_offset = -1 + (cell_z - 1) * dz + dz/2 - - for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) - # node_coordinates are the mapped reference node coordinates - node_coordinates[:, i, j, k, tree] .= cubed_sphere_mapping( - x_offset + dx/2 * nodes[i], - y_offset + dy/2 * nodes[j], - z_offset + dz/2 * nodes[k], - inner_radius, thickness, direction) - end + nodes, trees_per_face_dimension, layers, + inner_radius, thickness) + n_cells_x = n_cells_y = trees_per_face_dimension + n_cells_z = layers + + linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z, 6)) + + # Get cell length in reference mesh + dx = 2 / n_cells_x + dy = 2 / n_cells_y + dz = 2 / n_cells_z + + for direction in 1:6 + for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x + tree = linear_indices[cell_x, cell_y, cell_z, direction] + + x_offset = -1 + (cell_x - 1) * dx + dx / 2 + y_offset = -1 + (cell_y - 1) * dy + dy / 2 + z_offset = -1 + (cell_z - 1) * dz + dz / 2 + + for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) + # node_coordinates are the mapped reference node coordinates + node_coordinates[:, i, j, k, tree] .= cubed_sphere_mapping(x_offset + + dx / 2 * + nodes[i], + y_offset + + dy / 2 * + nodes[j], + z_offset + + dz / 2 * + nodes[k], + inner_radius, + thickness, + direction) + end + end end - end end # Map the computational coordinates xi, eta, zeta to the specified side of a cubed sphere # with the specified inner radius and thickness. function cubed_sphere_mapping(xi, eta, zeta, inner_radius, thickness, direction) - alpha = xi * pi/4 - beta = eta * pi/4 + alpha = xi * pi / 4 + beta = eta * pi / 4 - # Equiangular projection - x = tan(alpha) - y = tan(beta) + # Equiangular projection + x = tan(alpha) + y = tan(beta) - # Coordinates on unit cube per direction, see illustration above in the function connectivity_cubed_sphere - cube_coordinates = (SVector(-1, -x, y), - SVector( 1, x, y), - SVector( x, -1, y), - SVector(-x, 1, y), - SVector(-x, y, -1), - SVector( x, y, 1)) + # Coordinates on unit cube per direction, see illustration above in the function connectivity_cubed_sphere + cube_coordinates = (SVector(-1, -x, y), + SVector(1, x, y), + SVector(x, -1, y), + SVector(-x, 1, y), + SVector(-x, y, -1), + SVector(x, y, 1)) - # Radius on cube surface - r = sqrt(1 + x^2 + y^2) + # Radius on cube surface + r = sqrt(1 + x^2 + y^2) - # Radius of the sphere - R = inner_radius + thickness * (0.5 * (zeta + 1)) + # Radius of the sphere + R = inner_radius + thickness * (0.5 * (zeta + 1)) - # Projection onto the sphere - return R / r * cube_coordinates[direction] + # Projection onto the sphere + return R / r * cube_coordinates[direction] end - # Calculate physical coordinates of each element of an unstructured mesh read # in from a HOHQMesh file. This calculation is done with the transfinite interpolation # routines found in `mappings_geometry_curved_2d.jl` or `mappings_geometry_straight_2d.jl` function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, file_lines::Vector{String}, nodes, vertices, RealT) - # Get the number of trees and the number of interpolation nodes - n_trees = last(size(node_coordinates)) - nnodes = length(nodes) - - # Setup the starting file index to read in element indices and the additional - # curved boundary information provided by HOHQMesh. - file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1 - - # Create a work set of Gamma curves to create the node coordinates - CurvedSurfaceT = CurvedSurface{RealT} - surface_curves = Array{CurvedSurfaceT}(undef, 4) - - # Create other work arrays to perform the mesh construction - element_node_ids = Array{Int}(undef, 4) - curved_check = Vector{Int}(undef, 4) - quad_vertices = Array{RealT}(undef, (4, 2)) - quad_vertices_flipped = Array{RealT}(undef, (4, 2)) - curve_values = Array{RealT}(undef, (nnodes, 2)) - - # Create the barycentric weights used for the surface interpolations - bary_weights_ = barycentric_weights(nodes) - bary_weights = SVector{nnodes}(bary_weights_) - - # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates. - # When we extract information from the `current_line` we start at index 2 in order to - # avoid the Abaqus comment character "** " - for tree in 1:n_trees - # Pull the vertex node IDs - current_line = split(file_lines[file_idx]) - element_node_ids[1] = parse(Int, current_line[2]) - element_node_ids[2] = parse(Int, current_line[3]) - element_node_ids[3] = parse(Int, current_line[4]) - element_node_ids[4] = parse(Int, current_line[5]) - - # Pull the (x,y) values of the four vertices of the current tree out of the global vertices array - for i in 1:4 - quad_vertices[i, :] .= vertices[1:2, element_node_ids[i]] - end - # Pull the information to check if boundary is curved in order to read in additional data - file_idx += 1 - current_line = split(file_lines[file_idx]) - curved_check[1] = parse(Int, current_line[2]) - curved_check[2] = parse(Int, current_line[3]) - curved_check[3] = parse(Int, current_line[4]) - curved_check[4] = parse(Int, current_line[5]) - if sum(curved_check) == 0 - # Create the node coordinates on this particular element - calc_node_coordinates!(node_coordinates, tree, nodes, quad_vertices) - else - # Quadrilateral element has at least one curved side - # Flip node ordering to make sure the element is right-handed for the interpolations - m1 = 1 - m2 = 2 - @views quad_vertices_flipped[1, :] .= quad_vertices[4, :] - @views quad_vertices_flipped[2, :] .= quad_vertices[2, :] - @views quad_vertices_flipped[3, :] .= quad_vertices[3, :] - @views quad_vertices_flipped[4, :] .= quad_vertices[1, :] - for i in 1:4 - if curved_check[i] == 0 - # When curved_check[i] is 0 then the "curve" from vertex `i` to vertex `i+1` is a straight line. - # Evaluate a linear interpolant between the two points at each of the nodes. - for k in 1:nnodes - curve_values[k, 1] = linear_interpolate(nodes[k], quad_vertices_flipped[m1, 1], quad_vertices_flipped[m2, 1]) - curve_values[k, 2] = linear_interpolate(nodes[k], quad_vertices_flipped[m1, 2], quad_vertices_flipped[m2, 2]) - end - else - # When curved_check[i] is 1 this curved boundary information is supplied by the mesh - # generator. So we just read it into a work array - for k in 1:nnodes - file_idx += 1 - current_line = split(file_lines[file_idx]) - curve_values[k, 1] = parse(RealT,current_line[2]) - curve_values[k, 2] = parse(RealT,current_line[3]) - end + # Get the number of trees and the number of interpolation nodes + n_trees = last(size(node_coordinates)) + nnodes = length(nodes) + + # Setup the starting file index to read in element indices and the additional + # curved boundary information provided by HOHQMesh. + file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1 + + # Create a work set of Gamma curves to create the node coordinates + CurvedSurfaceT = CurvedSurface{RealT} + surface_curves = Array{CurvedSurfaceT}(undef, 4) + + # Create other work arrays to perform the mesh construction + element_node_ids = Array{Int}(undef, 4) + curved_check = Vector{Int}(undef, 4) + quad_vertices = Array{RealT}(undef, (4, 2)) + quad_vertices_flipped = Array{RealT}(undef, (4, 2)) + curve_values = Array{RealT}(undef, (nnodes, 2)) + + # Create the barycentric weights used for the surface interpolations + bary_weights_ = barycentric_weights(nodes) + bary_weights = SVector{nnodes}(bary_weights_) + + # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates. + # When we extract information from the `current_line` we start at index 2 in order to + # avoid the Abaqus comment character "** " + for tree in 1:n_trees + # Pull the vertex node IDs + current_line = split(file_lines[file_idx]) + element_node_ids[1] = parse(Int, current_line[2]) + element_node_ids[2] = parse(Int, current_line[3]) + element_node_ids[3] = parse(Int, current_line[4]) + element_node_ids[4] = parse(Int, current_line[5]) + + # Pull the (x,y) values of the four vertices of the current tree out of the global vertices array + for i in 1:4 + quad_vertices[i, :] .= vertices[1:2, element_node_ids[i]] end - # Construct the curve interpolant for the current side - surface_curves[i] = CurvedSurfaceT(nodes, bary_weights, copy(curve_values)) - # Indexing update that contains a "flip" to ensure correct element orientation. - # If we need to construct the straight line "curves" when curved_check[i] == 0 - m1 += 1 - if i == 3 - m2 = 1 + # Pull the information to check if boundary is curved in order to read in additional data + file_idx += 1 + current_line = split(file_lines[file_idx]) + curved_check[1] = parse(Int, current_line[2]) + curved_check[2] = parse(Int, current_line[3]) + curved_check[3] = parse(Int, current_line[4]) + curved_check[4] = parse(Int, current_line[5]) + if sum(curved_check) == 0 + # Create the node coordinates on this particular element + calc_node_coordinates!(node_coordinates, tree, nodes, quad_vertices) else - m2 += 1 + # Quadrilateral element has at least one curved side + # Flip node ordering to make sure the element is right-handed for the interpolations + m1 = 1 + m2 = 2 + @views quad_vertices_flipped[1, :] .= quad_vertices[4, :] + @views quad_vertices_flipped[2, :] .= quad_vertices[2, :] + @views quad_vertices_flipped[3, :] .= quad_vertices[3, :] + @views quad_vertices_flipped[4, :] .= quad_vertices[1, :] + for i in 1:4 + if curved_check[i] == 0 + # When curved_check[i] is 0 then the "curve" from vertex `i` to vertex `i+1` is a straight line. + # Evaluate a linear interpolant between the two points at each of the nodes. + for k in 1:nnodes + curve_values[k, 1] = linear_interpolate(nodes[k], + quad_vertices_flipped[m1, + 1], + quad_vertices_flipped[m2, + 1]) + curve_values[k, 2] = linear_interpolate(nodes[k], + quad_vertices_flipped[m1, + 2], + quad_vertices_flipped[m2, + 2]) + end + else + # When curved_check[i] is 1 this curved boundary information is supplied by the mesh + # generator. So we just read it into a work array + for k in 1:nnodes + file_idx += 1 + current_line = split(file_lines[file_idx]) + curve_values[k, 1] = parse(RealT, current_line[2]) + curve_values[k, 2] = parse(RealT, current_line[3]) + end + end + # Construct the curve interpolant for the current side + surface_curves[i] = CurvedSurfaceT(nodes, bary_weights, + copy(curve_values)) + # Indexing update that contains a "flip" to ensure correct element orientation. + # If we need to construct the straight line "curves" when curved_check[i] == 0 + m1 += 1 + if i == 3 + m2 = 1 + else + m2 += 1 + end + end + # Create the node coordinates on this particular element + calc_node_coordinates!(node_coordinates, tree, nodes, surface_curves) end - end - # Create the node coordinates on this particular element - calc_node_coordinates!(node_coordinates, tree, nodes, surface_curves) + # Move file index to the next tree + file_idx += 1 end - # Move file index to the next tree - file_idx += 1 - end - return file_idx + return file_idx end - # Calculate physical coordinates of each element of an unstructured mesh read # in from a HOHQMesh file. This calculation is done with the transfinite interpolation # routines found in `transfinite_mappings_3d.jl` function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, file_lines::Vector{String}, nodes, vertices, RealT) - # Get the number of trees and the number of interpolation nodes - n_trees = last(size(node_coordinates)) - nnodes = length(nodes) - - # Setup the starting file index to read in element indices and the additional - # curved boundary information provided by HOHQMesh. - file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1 - - # Create a work set of Gamma curves to create the node coordinates - CurvedFaceT = CurvedFace{RealT} - face_curves = Array{CurvedFaceT}(undef, 6) - - # Create other work arrays to perform the mesh construction - element_node_ids = Array{Int}(undef, 8) - curved_check = Vector{Int}(undef, 6) - hex_vertices = Array{RealT}(undef, (3, 8)) - face_vertices = Array{RealT}(undef, (3, 4)) - curve_values = Array{RealT}(undef, (3, nnodes, nnodes)) - - # Create the barycentric weights used for the surface interpolations - bary_weights_ = barycentric_weights(nodes) - bary_weights = SVector{nnodes}(bary_weights_) - - # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates. - # When we extract information from the `current_line` we start at index 2 in order to - # avoid the Abaqus comment character "** " - for tree in 1:n_trees - # pull the vertex node IDs - current_line = split(file_lines[file_idx]) - element_node_ids[1] = parse(Int, current_line[2]) - element_node_ids[2] = parse(Int, current_line[3]) - element_node_ids[3] = parse(Int, current_line[4]) - element_node_ids[4] = parse(Int, current_line[5]) - element_node_ids[5] = parse(Int, current_line[6]) - element_node_ids[6] = parse(Int, current_line[7]) - element_node_ids[7] = parse(Int, current_line[8]) - element_node_ids[8] = parse(Int, current_line[9]) - - # Pull the (x, y, z) values of the eight vertices of the current tree out of the global vertices array - for i in 1:8 - hex_vertices[:, i] .= vertices[:, element_node_ids[i]] - end - # Pull the information to check if boundary is curved in order to read in additional data - file_idx += 1 - current_line = split(file_lines[file_idx]) - curved_check[1] = parse(Int, current_line[2]) - curved_check[2] = parse(Int, current_line[3]) - curved_check[3] = parse(Int, current_line[4]) - curved_check[4] = parse(Int, current_line[5]) - curved_check[5] = parse(Int, current_line[6]) - curved_check[6] = parse(Int, current_line[7]) - if sum(curved_check) == 0 - # Create the node coordinates on this element - calc_node_coordinates!(node_coordinates, tree, nodes, hex_vertices) - else - # Hexahedral element has at least one curved side - for face in 1:6 - if curved_check[face] == 0 - # Face is a flat plane. Evaluate a bilinear interpolant between the four vertices of the face at each of the nodes. - get_vertices_for_bilinear_interpolant!(face_vertices, face, hex_vertices) - for q in 1:nnodes, p in 1:nnodes - @views bilinear_interpolation!(curve_values[:, p, q], face_vertices, nodes[p], nodes[q]) - end - else # curved_check[face] == 1 - # Curved face boundary information is supplied by the mesh file. Just read it into a work array - for q in 1:nnodes, p in 1:nnodes - file_idx += 1 - current_line = split(file_lines[file_idx]) - curve_values[1, p, q] = parse(RealT,current_line[2]) - curve_values[2, p, q] = parse(RealT,current_line[3]) - curve_values[3, p, q] = parse(RealT,current_line[4]) - end + # Get the number of trees and the number of interpolation nodes + n_trees = last(size(node_coordinates)) + nnodes = length(nodes) + + # Setup the starting file index to read in element indices and the additional + # curved boundary information provided by HOHQMesh. + file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1 + + # Create a work set of Gamma curves to create the node coordinates + CurvedFaceT = CurvedFace{RealT} + face_curves = Array{CurvedFaceT}(undef, 6) + + # Create other work arrays to perform the mesh construction + element_node_ids = Array{Int}(undef, 8) + curved_check = Vector{Int}(undef, 6) + hex_vertices = Array{RealT}(undef, (3, 8)) + face_vertices = Array{RealT}(undef, (3, 4)) + curve_values = Array{RealT}(undef, (3, nnodes, nnodes)) + + # Create the barycentric weights used for the surface interpolations + bary_weights_ = barycentric_weights(nodes) + bary_weights = SVector{nnodes}(bary_weights_) + + # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates. + # When we extract information from the `current_line` we start at index 2 in order to + # avoid the Abaqus comment character "** " + for tree in 1:n_trees + # pull the vertex node IDs + current_line = split(file_lines[file_idx]) + element_node_ids[1] = parse(Int, current_line[2]) + element_node_ids[2] = parse(Int, current_line[3]) + element_node_ids[3] = parse(Int, current_line[4]) + element_node_ids[4] = parse(Int, current_line[5]) + element_node_ids[5] = parse(Int, current_line[6]) + element_node_ids[6] = parse(Int, current_line[7]) + element_node_ids[7] = parse(Int, current_line[8]) + element_node_ids[8] = parse(Int, current_line[9]) + + # Pull the (x, y, z) values of the eight vertices of the current tree out of the global vertices array + for i in 1:8 + hex_vertices[:, i] .= vertices[:, element_node_ids[i]] + end + # Pull the information to check if boundary is curved in order to read in additional data + file_idx += 1 + current_line = split(file_lines[file_idx]) + curved_check[1] = parse(Int, current_line[2]) + curved_check[2] = parse(Int, current_line[3]) + curved_check[3] = parse(Int, current_line[4]) + curved_check[4] = parse(Int, current_line[5]) + curved_check[5] = parse(Int, current_line[6]) + curved_check[6] = parse(Int, current_line[7]) + if sum(curved_check) == 0 + # Create the node coordinates on this element + calc_node_coordinates!(node_coordinates, tree, nodes, hex_vertices) + else + # Hexahedral element has at least one curved side + for face in 1:6 + if curved_check[face] == 0 + # Face is a flat plane. + # Evaluate a bilinear interpolant between the four vertices + # of the face at each of the nodes. + get_vertices_for_bilinear_interpolant!(face_vertices, face, + hex_vertices) + for q in 1:nnodes, p in 1:nnodes + @views bilinear_interpolation!(curve_values[:, p, q], + face_vertices, nodes[p], + nodes[q]) + end + else # curved_check[face] == 1 + # Curved face boundary information is supplied by + # the mesh file. Just read it into a work array + for q in 1:nnodes, p in 1:nnodes + file_idx += 1 + current_line = split(file_lines[file_idx]) + curve_values[1, p, q] = parse(RealT, current_line[2]) + curve_values[2, p, q] = parse(RealT, current_line[3]) + curve_values[3, p, q] = parse(RealT, current_line[4]) + end + end + # Construct the curve interpolant for the current side + face_curves[face] = CurvedFaceT(nodes, bary_weights, copy(curve_values)) + end + # Create the node coordinates on this particular element + calc_node_coordinates!(node_coordinates, tree, nodes, face_curves) end - # Construct the curve interpolant for the current side - face_curves[face] = CurvedFaceT(nodes, bary_weights, copy(curve_values)) - end - # Create the node coordinates on this particular element - calc_node_coordinates!(node_coordinates, tree, nodes, face_curves) + # Move file index to the next tree + file_idx += 1 end - # Move file index to the next tree - file_idx += 1 - end - return file_idx + return file_idx end - # Given the eight `hex_vertices` for a hexahedral element extract # the four `face_vertices` for a particular `face_index`. function get_vertices_for_bilinear_interpolant!(face_vertices, face_index, hex_vertices) - if face_index == 1 - @views face_vertices[:, 1] .= hex_vertices[:, 1] - @views face_vertices[:, 2] .= hex_vertices[:, 2] - @views face_vertices[:, 3] .= hex_vertices[:, 6] - @views face_vertices[:, 4] .= hex_vertices[:, 5] - elseif face_index == 2 - @views face_vertices[:, 1] .= hex_vertices[:, 4] - @views face_vertices[:, 2] .= hex_vertices[:, 3] - @views face_vertices[:, 3] .= hex_vertices[:, 7] - @views face_vertices[:, 4] .= hex_vertices[:, 8] - elseif face_index == 3 - @views face_vertices[:, 1] .= hex_vertices[:, 1] - @views face_vertices[:, 2] .= hex_vertices[:, 2] - @views face_vertices[:, 3] .= hex_vertices[:, 3] - @views face_vertices[:, 4] .= hex_vertices[:, 4] - elseif face_index == 4 - @views face_vertices[:, 1] .= hex_vertices[:, 2] - @views face_vertices[:, 2] .= hex_vertices[:, 3] - @views face_vertices[:, 3] .= hex_vertices[:, 6] - @views face_vertices[:, 4] .= hex_vertices[:, 7] - elseif face_index == 5 - @views face_vertices[:, 1] .= hex_vertices[:, 5] - @views face_vertices[:, 2] .= hex_vertices[:, 6] - @views face_vertices[:, 3] .= hex_vertices[:, 7] - @views face_vertices[:, 4] .= hex_vertices[:, 8] - else # face_index == 6 - @views face_vertices[:, 1] .= hex_vertices[:, 1] - @views face_vertices[:, 2] .= hex_vertices[:, 4] - @views face_vertices[:, 3] .= hex_vertices[:, 8] - @views face_vertices[:, 4] .= hex_vertices[:, 5] - end + if face_index == 1 + @views face_vertices[:, 1] .= hex_vertices[:, 1] + @views face_vertices[:, 2] .= hex_vertices[:, 2] + @views face_vertices[:, 3] .= hex_vertices[:, 6] + @views face_vertices[:, 4] .= hex_vertices[:, 5] + elseif face_index == 2 + @views face_vertices[:, 1] .= hex_vertices[:, 4] + @views face_vertices[:, 2] .= hex_vertices[:, 3] + @views face_vertices[:, 3] .= hex_vertices[:, 7] + @views face_vertices[:, 4] .= hex_vertices[:, 8] + elseif face_index == 3 + @views face_vertices[:, 1] .= hex_vertices[:, 1] + @views face_vertices[:, 2] .= hex_vertices[:, 2] + @views face_vertices[:, 3] .= hex_vertices[:, 3] + @views face_vertices[:, 4] .= hex_vertices[:, 4] + elseif face_index == 4 + @views face_vertices[:, 1] .= hex_vertices[:, 2] + @views face_vertices[:, 2] .= hex_vertices[:, 3] + @views face_vertices[:, 3] .= hex_vertices[:, 6] + @views face_vertices[:, 4] .= hex_vertices[:, 7] + elseif face_index == 5 + @views face_vertices[:, 1] .= hex_vertices[:, 5] + @views face_vertices[:, 2] .= hex_vertices[:, 6] + @views face_vertices[:, 3] .= hex_vertices[:, 7] + @views face_vertices[:, 4] .= hex_vertices[:, 8] + else # face_index == 6 + @views face_vertices[:, 1] .= hex_vertices[:, 1] + @views face_vertices[:, 2] .= hex_vertices[:, 4] + @views face_vertices[:, 3] .= hex_vertices[:, 8] + @views face_vertices[:, 4] .= hex_vertices[:, 5] + end end - # Evaluate a bilinear interpolant at a point (u,v) given the four vertices where the face is right-handed # 4 3 # o----------------o @@ -1427,293 +1480,330 @@ end # 1 2 # and return the 3D coordinate point (x, y, z) function bilinear_interpolation!(coordinate, face_vertices, u, v) - for j in 1:3 - coordinate[j] = 0.25 * ( face_vertices[j,1] * (1 - u) * (1 - v) - + face_vertices[j,2] * (1 + u) * (1 - v) - + face_vertices[j,3] * (1 + u) * (1 + v) - + face_vertices[j,4] * (1 - u) * (1 + v) ) - end + for j in 1:3 + coordinate[j] = 0.25 * (face_vertices[j, 1] * (1 - u) * (1 - v) + + face_vertices[j, 2] * (1 + u) * (1 - v) + + face_vertices[j, 3] * (1 + u) * (1 + v) + + face_vertices[j, 4] * (1 - u) * (1 + v)) + end end - -function balance!(mesh::P4estMesh{2}, init_fn=C_NULL) - p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn) - # Due to a bug in `p4est`, the forest needs to be rebalanced twice sometimes - # See https://github.com/cburstedde/p4est/issues/112 - p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn) +function balance!(mesh::P4estMesh{2}, init_fn = C_NULL) + p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn) + # Due to a bug in `p4est`, the forest needs to be rebalanced twice sometimes + # See https://github.com/cburstedde/p4est/issues/112 + p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn) end -function balance!(mesh::P4estMesh{3}, init_fn=C_NULL) - p8est_balance(mesh.p4est, P8EST_CONNECT_FACE, init_fn) +function balance!(mesh::P4estMesh{3}, init_fn = C_NULL) + p8est_balance(mesh.p4est, P8EST_CONNECT_FACE, init_fn) end -function partition!(mesh::P4estMesh{2}; weight_fn=C_NULL) - p4est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), weight_fn) +function partition!(mesh::P4estMesh{2}; weight_fn = C_NULL) + p4est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), + weight_fn) end -function partition!(mesh::P4estMesh{3}; weight_fn=C_NULL) - p8est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), weight_fn) +function partition!(mesh::P4estMesh{3}; weight_fn = C_NULL) + p8est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), + weight_fn) end - function update_ghost_layer!(mesh::P4estMesh) - ghost_destroy_p4est(mesh.ghost) - mesh.ghost = ghost_new_p4est(mesh.p4est) + ghost_destroy_p4est(mesh.ghost) + mesh.ghost = ghost_new_p4est(mesh.p4est) end - function init_fn(p4est, which_tree, quadrant) - # Unpack quadrant's user data ([global quad ID, controller_value]) - ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data)) + # Unpack quadrant's user data ([global quad ID, controller_value]) + ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data)) - # Initialize quad ID as -1 and controller_value as 0 (don't refine or coarsen) - unsafe_store!(ptr, -1, 1) - unsafe_store!(ptr, 0, 2) + # Initialize quad ID as -1 and controller_value as 0 (don't refine or coarsen) + unsafe_store!(ptr, -1, 1) + unsafe_store!(ptr, 0, 2) - return nothing + return nothing end # 2D -cfunction(::typeof(init_fn), ::Val{2}) = @cfunction(init_fn, Cvoid, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t})) +function cfunction(::typeof(init_fn), ::Val{2}) + @cfunction(init_fn, Cvoid, + (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t})) +end # 3D -cfunction(::typeof(init_fn), ::Val{3}) = @cfunction(init_fn, Cvoid, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t})) +function cfunction(::typeof(init_fn), ::Val{3}) + @cfunction(init_fn, Cvoid, + (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t})) +end function refine_fn(p4est, which_tree, quadrant) - # Controller value has been copied to the quadrant's user data storage before. - # Unpack quadrant's user data ([global quad ID, controller_value]). - ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data)) - controller_value = unsafe_load(ptr, 2) - - if controller_value > 0 - # return true (refine) - return Cint(1) - else - # return false (don't refine) - return Cint(0) - end + # Controller value has been copied to the quadrant's user data storage before. + # Unpack quadrant's user data ([global quad ID, controller_value]). + ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data)) + controller_value = unsafe_load(ptr, 2) + + if controller_value > 0 + # return true (refine) + return Cint(1) + else + # return false (don't refine) + return Cint(0) + end end # 2D -cfunction(::typeof(refine_fn), ::Val{2}) = @cfunction(refine_fn, Cint, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t})) +function cfunction(::typeof(refine_fn), ::Val{2}) + @cfunction(refine_fn, Cint, + (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t})) +end # 3D -cfunction(::typeof(refine_fn), ::Val{3}) = @cfunction(refine_fn, Cint, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t})) +function cfunction(::typeof(refine_fn), ::Val{3}) + @cfunction(refine_fn, Cint, + (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t})) +end # Refine marked cells and rebalance forest. # Return a list of all cells that have been refined during refinement or rebalancing. function refine!(mesh::P4estMesh) - # Copy original element IDs to quad user data storage - original_n_cells = ncells(mesh) - save_original_ids(mesh) + # Copy original element IDs to quad user data storage + original_n_cells = ncells(mesh) + save_original_ids(mesh) - init_fn_c = cfunction(init_fn, Val(ndims(mesh))) - refine_fn_c = cfunction(refine_fn, Val(ndims(mesh))) + init_fn_c = cfunction(init_fn, Val(ndims(mesh))) + refine_fn_c = cfunction(refine_fn, Val(ndims(mesh))) - # Refine marked cells - @trixi_timeit timer() "refine" refine_p4est!(mesh.p4est, false, refine_fn_c, init_fn_c) + # Refine marked cells + @trixi_timeit timer() "refine" refine_p4est!(mesh.p4est, false, refine_fn_c, + init_fn_c) - @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c) + @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c) - return collect_changed_cells(mesh, original_n_cells) + return collect_changed_cells(mesh, original_n_cells) end - function coarsen_fn(p4est, which_tree, quadrants_ptr) - quadrants = unsafe_wrap_quadrants(quadrants_ptr, p4est) - - # Controller value has been copied to the quadrant's user data storage before. - # Load controller value from quadrant's user data ([global quad ID, controller_value]). - controller_value(i) = unsafe_load(Ptr{Int}(unsafe_load(quadrants[i].p.user_data)), 2) - - # `p4est` calls this function for each 2^ndims quads that could be coarsened to a single one. - # Only coarsen if all these 2^ndims quads have been marked for coarsening. - if all(i -> controller_value(i) < 0, eachindex(quadrants)) - # return true (coarsen) - return Cint(1) - else - # return false (don't coarsen) - return Cint(0) - end + quadrants = unsafe_wrap_quadrants(quadrants_ptr, p4est) + + # Controller value has been copied to the quadrant's user data storage before. + # Load controller value from quadrant's user data ([global quad ID, controller_value]). + function controller_value(i) + unsafe_load(Ptr{Int}(unsafe_load(quadrants[i].p.user_data)), 2) + end + + # `p4est` calls this function for each 2^ndims quads that could be coarsened to a single one. + # Only coarsen if all these 2^ndims quads have been marked for coarsening. + if all(i -> controller_value(i) < 0, eachindex(quadrants)) + # return true (coarsen) + return Cint(1) + else + # return false (don't coarsen) + return Cint(0) + end end # 2D -unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p4est_t}) = unsafe_wrap(Array, quadrants_ptr, 4) +function unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p4est_t}) + unsafe_wrap(Array, quadrants_ptr, 4) +end # 3D -unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p8est_t}) = unsafe_wrap(Array, quadrants_ptr, 8) +function unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p8est_t}) + unsafe_wrap(Array, quadrants_ptr, 8) +end # 2D -cfunction(::typeof(coarsen_fn), ::Val{2}) = @cfunction(coarsen_fn, Cint, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p4est_quadrant_t}})) +function cfunction(::typeof(coarsen_fn), ::Val{2}) + @cfunction(coarsen_fn, Cint, + (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p4est_quadrant_t}})) +end # 3D -cfunction(::typeof(coarsen_fn), ::Val{3}) = @cfunction(coarsen_fn, Cint, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p8est_quadrant_t}})) +function cfunction(::typeof(coarsen_fn), ::Val{3}) + @cfunction(coarsen_fn, Cint, + (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p8est_quadrant_t}})) +end # Coarsen marked cells if the forest will stay balanced. # Return a list of all cells that have been coarsened. function coarsen!(mesh::P4estMesh) - # Copy original element IDs to quad user data storage - original_n_cells = ncells(mesh) - save_original_ids(mesh) - - # Coarsen marked cells - coarsen_fn_c = cfunction(coarsen_fn, Val(ndims(mesh))) - init_fn_c = cfunction(init_fn, Val(ndims(mesh))) - - @trixi_timeit timer() "coarsen!" coarsen_p4est!(mesh.p4est, false, coarsen_fn_c, init_fn_c) - - # IDs of newly created cells (one-based) - new_cells = collect_new_cells(mesh) - # Old IDs of cells that have been coarsened (one-based) - coarsened_cells_vec = collect_changed_cells(mesh, original_n_cells) - # 2^ndims changed cells should have been coarsened to one new cell. - # This matrix will store the IDs of all cells that have been coarsened to cell new_cells[i] - # in the i-th column. - coarsened_cells = reshape(coarsened_cells_vec, 2^ndims(mesh), length(new_cells)) - - # Save new original IDs to find out what changed after balancing - intermediate_n_cells = ncells(mesh) - save_original_ids(mesh) - - @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c) - - refined_cells = collect_changed_cells(mesh, intermediate_n_cells) - - # Some cells may have been coarsened even though they unbalanced the forest. - # These cells have now been refined again by p4est_balance. - # refined_cells contains the intermediate IDs (ID of coarse cell - # between coarsening and balancing) of these cells. - # Find original ID of each cell that has been coarsened and then refined again. - for refined_cell in refined_cells - # i-th cell of the ones that have been created by coarsening has been refined again - i = findfirst(==(refined_cell), new_cells) - - # Remove IDs of the 2^ndims cells that have been coarsened to this cell - coarsened_cells[:, i] .= -1 - end - - # Return all IDs of cells that have been coarsened but not refined again by balancing - return coarsened_cells_vec[coarsened_cells_vec .>= 0] -end + # Copy original element IDs to quad user data storage + original_n_cells = ncells(mesh) + save_original_ids(mesh) + + # Coarsen marked cells + coarsen_fn_c = cfunction(coarsen_fn, Val(ndims(mesh))) + init_fn_c = cfunction(init_fn, Val(ndims(mesh))) + + @trixi_timeit timer() "coarsen!" coarsen_p4est!(mesh.p4est, false, coarsen_fn_c, + init_fn_c) + + # IDs of newly created cells (one-based) + new_cells = collect_new_cells(mesh) + # Old IDs of cells that have been coarsened (one-based) + coarsened_cells_vec = collect_changed_cells(mesh, original_n_cells) + # 2^ndims changed cells should have been coarsened to one new cell. + # This matrix will store the IDs of all cells that have been coarsened to cell new_cells[i] + # in the i-th column. + coarsened_cells = reshape(coarsened_cells_vec, 2^ndims(mesh), length(new_cells)) + + # Save new original IDs to find out what changed after balancing + intermediate_n_cells = ncells(mesh) + save_original_ids(mesh) + + @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c) + + refined_cells = collect_changed_cells(mesh, intermediate_n_cells) + + # Some cells may have been coarsened even though they unbalanced the forest. + # These cells have now been refined again by p4est_balance. + # refined_cells contains the intermediate IDs (ID of coarse cell + # between coarsening and balancing) of these cells. + # Find original ID of each cell that has been coarsened and then refined again. + for refined_cell in refined_cells + # i-th cell of the ones that have been created by coarsening has been refined again + i = findfirst(==(refined_cell), new_cells) + + # Remove IDs of the 2^ndims cells that have been coarsened to this cell + coarsened_cells[:, i] .= -1 + end + # Return all IDs of cells that have been coarsened but not refined again by balancing + return coarsened_cells_vec[coarsened_cells_vec .>= 0] +end # Copy global quad ID to quad's user data storage, will be called below function save_original_id_iter_volume(info, user_data) - info_obj = unsafe_load(info) + info_obj = unsafe_load(info) - # Load tree from global trees array, one-based indexing - tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset - # Global quad ID - quad_id = offset + info_obj.quadid + # Load tree from global trees array, one-based indexing + tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset + # Global quad ID + quad_id = offset + info_obj.quadid - # Unpack quadrant's user data ([global quad ID, controller_value]) - ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) - # Save global quad ID - unsafe_store!(ptr, quad_id, 1) + # Unpack quadrant's user data ([global quad ID, controller_value]) + ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) + # Save global quad ID + unsafe_store!(ptr, quad_id, 1) - return nothing + return nothing end # 2D -cfunction(::typeof(save_original_id_iter_volume), ::Val{2}) = @cfunction(save_original_id_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(save_original_id_iter_volume), ::Val{2}) + @cfunction(save_original_id_iter_volume, Cvoid, + (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(save_original_id_iter_volume), ::Val{3}) = @cfunction(save_original_id_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(save_original_id_iter_volume), ::Val{3}) + @cfunction(save_original_id_iter_volume, Cvoid, + (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +end # Copy old element IDs to each quad's user data storage function save_original_ids(mesh::P4estMesh) - iter_volume_c = cfunction(save_original_id_iter_volume, Val(ndims(mesh))) + iter_volume_c = cfunction(save_original_id_iter_volume, Val(ndims(mesh))) - iterate_p4est(mesh.p4est, C_NULL; iter_volume_c=iter_volume_c) + iterate_p4est(mesh.p4est, C_NULL; iter_volume_c = iter_volume_c) end - # Extract information about which cells have been changed function collect_changed_iter_volume(info, user_data) - info_obj = unsafe_load(info) - - # The original element ID has been saved to user_data before. - # Load original quad ID from quad's user data ([global quad ID, controller_value]). - quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) - original_id = unsafe_load(quad_data_ptr, 1) - - # original_id of cells that have been newly created is -1 - if original_id >= 0 - # Unpack user_data = original_cells - user_data_ptr = Ptr{Int}(user_data) - - # If quad has an original_id, it existed before refinement/coarsening, - # and therefore wasn't changed. - # Mark original_id as "not changed during refinement/coarsening" in original_cells - unsafe_store!(user_data_ptr, 0, original_id + 1) - end + info_obj = unsafe_load(info) + + # The original element ID has been saved to user_data before. + # Load original quad ID from quad's user data ([global quad ID, controller_value]). + quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) + original_id = unsafe_load(quad_data_ptr, 1) + + # original_id of cells that have been newly created is -1 + if original_id >= 0 + # Unpack user_data = original_cells + user_data_ptr = Ptr{Int}(user_data) + + # If quad has an original_id, it existed before refinement/coarsening, + # and therefore wasn't changed. + # Mark original_id as "not changed during refinement/coarsening" in original_cells + unsafe_store!(user_data_ptr, 0, original_id + 1) + end - return nothing + return nothing end # 2D -cfunction(::typeof(collect_changed_iter_volume), ::Val{2}) = @cfunction(collect_changed_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(collect_changed_iter_volume), ::Val{2}) + @cfunction(collect_changed_iter_volume, Cvoid, + (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(collect_changed_iter_volume), ::Val{3}) = @cfunction(collect_changed_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(collect_changed_iter_volume), ::Val{3}) + @cfunction(collect_changed_iter_volume, Cvoid, + (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +end function collect_changed_cells(mesh::P4estMesh, original_n_cells) - original_cells = collect(1:original_n_cells) + original_cells = collect(1:original_n_cells) - # Iterate over all quads and set original cells that haven't been changed to zero - iter_volume_c = cfunction(collect_changed_iter_volume, Val(ndims(mesh))) + # Iterate over all quads and set original cells that haven't been changed to zero + iter_volume_c = cfunction(collect_changed_iter_volume, Val(ndims(mesh))) - iterate_p4est(mesh.p4est, original_cells; iter_volume_c=iter_volume_c) + iterate_p4est(mesh.p4est, original_cells; iter_volume_c = iter_volume_c) - # Changed cells are all that haven't been set to zero above - changed_original_cells = original_cells[original_cells .> 0] + # Changed cells are all that haven't been set to zero above + changed_original_cells = original_cells[original_cells .> 0] - return changed_original_cells + return changed_original_cells end - # Extract newly created cells function collect_new_iter_volume(info, user_data) - info_obj = unsafe_load(info) - - # The original element ID has been saved to user_data before. - # Unpack quadrant's user data ([global quad ID, controller_value]). - quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) - original_id = unsafe_load(quad_data_ptr, 1) - - # original_id of cells that have been newly created is -1 - if original_id < 0 - # Load tree from global trees array, one-based indexing - tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset - # Global quad ID - quad_id = offset + info_obj.quadid - - # Unpack user_data = original_cells - user_data_ptr = Ptr{Int}(user_data) - - # Mark cell as "newly created during refinement/coarsening/balancing" - unsafe_store!(user_data_ptr, 1, quad_id + 1) - end + info_obj = unsafe_load(info) + + # The original element ID has been saved to user_data before. + # Unpack quadrant's user data ([global quad ID, controller_value]). + quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data)) + original_id = unsafe_load(quad_data_ptr, 1) + + # original_id of cells that have been newly created is -1 + if original_id < 0 + # Load tree from global trees array, one-based indexing + tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset + # Global quad ID + quad_id = offset + info_obj.quadid + + # Unpack user_data = original_cells + user_data_ptr = Ptr{Int}(user_data) + + # Mark cell as "newly created during refinement/coarsening/balancing" + unsafe_store!(user_data_ptr, 1, quad_id + 1) + end - return nothing + return nothing end # 2D -cfunction(::typeof(collect_new_iter_volume), ::Val{2}) = @cfunction(collect_new_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(collect_new_iter_volume), ::Val{2}) + @cfunction(collect_new_iter_volume, Cvoid, + (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(collect_new_iter_volume), ::Val{3}) = @cfunction(collect_new_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(collect_new_iter_volume), ::Val{3}) + @cfunction(collect_new_iter_volume, Cvoid, + (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid})) +end function collect_new_cells(mesh::P4estMesh) - cell_is_new = zeros(Int, ncells(mesh)) + cell_is_new = zeros(Int, ncells(mesh)) - # Iterate over all quads and set original cells that have been changed to one - iter_volume_c = cfunction(collect_new_iter_volume, Val(ndims(mesh))) + # Iterate over all quads and set original cells that have been changed to one + iter_volume_c = cfunction(collect_new_iter_volume, Val(ndims(mesh))) - iterate_p4est(mesh.p4est, cell_is_new; iter_volume_c=iter_volume_c) + iterate_p4est(mesh.p4est, cell_is_new; iter_volume_c = iter_volume_c) - # Changed cells are all that haven't been set to zero above - new_cells = findall(==(1), cell_is_new) + # Changed cells are all that haven't been set to zero above + new_cells = findall(==(1), cell_is_new) - return new_cells + return new_cells end - - end # @muladd diff --git a/src/meshes/parallel_tree.jl b/src/meshes/parallel_tree.jl index 22f9e1e6975..83d99c4d110 100644 --- a/src/meshes/parallel_tree.jl +++ b/src/meshes/parallel_tree.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Composite type that represents a NDIMS-dimensional tree (parallel version). # @@ -26,211 +26,211 @@ # way. Also, depth-first ordering *might* not by guaranteed during # refinement/coarsening operations. mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS} - parent_ids::Vector{Int} - child_ids::Matrix{Int} - neighbor_ids::Matrix{Int} - levels::Vector{Int} - coordinates::Matrix{Float64} - original_cell_ids::Vector{Int} - mpi_ranks::Vector{Int} - - capacity::Int - length::Int - dummy::Int - - center_level_0::SVector{NDIMS, Float64} - length_level_0::Float64 - periodicity::NTuple{NDIMS, Bool} - - function ParallelTree{NDIMS}(capacity::Integer) where NDIMS - # Verify that NDIMS is an integer - @assert NDIMS isa Integer - - # Create instance - t = new() - - # Initialize fields with defaults - # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations - t.parent_ids = fill(typemin(Int), capacity + 1) - t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) - t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) - t.levels = fill(typemin(Int), capacity + 1) - t.coordinates = fill(NaN, NDIMS, capacity + 1) - t.original_cell_ids = fill(typemin(Int), capacity + 1) - t.mpi_ranks = fill(typemin(Int), capacity + 1) - - t.capacity = capacity - t.length = 0 - t.dummy = capacity + 1 - - t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS)) - t.length_level_0 = NaN - - return t - end + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + mpi_ranks::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::SVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function ParallelTree{NDIMS}(capacity::Integer) where {NDIMS} + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2 * NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + t.mpi_ranks = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS)) + t.length_level_0 = NaN + + return t + end end - # Constructor for passing the dimension as an argument -ParallelTree(::Val{NDIMS}, args...) where NDIMS = ParallelTree{NDIMS}(args...) +ParallelTree(::Val{NDIMS}, args...) where {NDIMS} = ParallelTree{NDIMS}(args...) # Create and initialize tree function ParallelTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, - length::Real, periodicity=true) where NDIMS - # Create instance - t = ParallelTree{NDIMS}(capacity) + length::Real, periodicity = true) where {NDIMS} + # Create instance + t = ParallelTree{NDIMS}(capacity) - # Initialize root cell - init!(t, center, length, periodicity) + # Initialize root cell + init!(t, center, length, periodicity) - return t + return t end # Constructor accepting a single number as center (as opposed to an array) for 1D -ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity) - +function ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity = true) + ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity) +end # Clear tree with deleting data structures, store center and length, and create root cell -function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, periodicity=true) - clear!(t) - - # Set domain information - t.center_level_0 = center - t.length_level_0 = length - - # Create root cell - t.length += 1 - t.parent_ids[1] = 0 - t.child_ids[:, 1] .= 0 - t.levels[1] = 0 - set_cell_coordinates!(t, t.center_level_0, 1) - t.original_cell_ids[1] = 0 - t.mpi_ranks[1] = typemin(Int) - - # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor - if all(periodicity) - # Also catches case where periodicity = true - t.neighbor_ids[:, 1] .= 1 - t.periodicity = ntuple(x->true, ndims(t)) - elseif !any(periodicity) - # Also catches case where periodicity = false - t.neighbor_ids[:, 1] .= 0 - t.periodicity = ntuple(x->false, ndims(t)) - else - # Default case if periodicity is an iterable - for dimension in 1:ndims(t) - if periodicity[dimension] - t.neighbor_ids[2 * dimension - 1, 1] = 1 - t.neighbor_ids[2 * dimension - 0, 1] = 1 - else - t.neighbor_ids[2 * dimension - 1, 1] = 0 - t.neighbor_ids[2 * dimension - 0, 1] = 0 - end +function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, + periodicity = true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + set_cell_coordinates!(t, t.center_level_0, 1) + t.original_cell_ids[1] = 0 + t.mpi_ranks[1] = typemin(Int) + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x -> true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x -> false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) end - - t.periodicity = Tuple(periodicity) - end end - # Convenience output for debugging function Base.show(io::IO, ::MIME"text/plain", t::ParallelTree) - @nospecialize t # reduce precompilation time - - l = t.length - println(io, '*'^20) - println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") - println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") - println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") - println(io, "t.levels[1:l] = $(t.levels[1:l])") - println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") - println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") - println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])") - println(io, "t.capacity = $(t.capacity)") - println(io, "t.length = $(t.length)") - println(io, "t.dummy = $(t.dummy)") - println(io, "t.center_level_0 = $(t.center_level_0)") - println(io, "t.length_level_0 = $(t.length_level_0)") - println(io, '*'^20) + @nospecialize t # reduce precompilation time + + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, + "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, + "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) end - # Check if cell is own cell, i.e., belongs to this MPI rank is_own_cell(t::ParallelTree, cell_id) = t.mpi_ranks[cell_id] == mpi_rank() - # Return an array with the ids of all leaf cells for a given rank -leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id - t.mpi_ranks[cell_id] == rank - end +leaf_cells_by_rank(t::ParallelTree, rank) = + filter_leaf_cells(t) do cell_id + t.mpi_ranks[cell_id] == rank + end # Return an array with the ids of all local leaf cells local_leaf_cells(t::ParallelTree) = leaf_cells_by_rank(t, mpi_rank()) - # Set information for child cell `child_id` based on parent cell `cell_id` (except neighbors) function init_child!(t::ParallelTree, cell_id, child, child_id) - t.parent_ids[child_id] = cell_id - t.child_ids[child, cell_id] = child_id - t.child_ids[:, child_id] .= 0 - t.levels[child_id] = t.levels[cell_id] + 1 - set_cell_coordinates!(t, - child_coordinates(t, cell_coordinates(t, cell_id), length_at_cell(t, cell_id), child), child_id) - t.original_cell_ids[child_id] = 0 - t.mpi_ranks[child_id] = t.mpi_ranks[cell_id] - - return nothing + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + set_cell_coordinates!(t, + child_coordinates(t, cell_coordinates(t, cell_id), + length_at_cell(t, cell_id), child), + child_id) + t.original_cell_ids[child_id] = 0 + t.mpi_ranks[child_id] = t.mpi_ranks[cell_id] + + return nothing end - # Reset range of cells to values that are prone to cause errors as soon as they are used. # # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. function invalidate!(t::ParallelTree, first::Int, last::Int) - @assert first > 0 - @assert last <= t.capacity + 1 - - # Integer values are set to smallest negative value, floating point values to NaN - t.parent_ids[first:last] .= typemin(Int) - t.child_ids[:, first:last] .= typemin(Int) - t.neighbor_ids[:, first:last] .= typemin(Int) - t.levels[first:last] .= typemin(Int) - t.coordinates[:, first:last] .= NaN - t.original_cell_ids[first:last] .= typemin(Int) - t.mpi_ranks[first:last] .= typemin(Int) - - return nothing + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + t.mpi_ranks[first:last] .= typemin(Int) + + return nothing end - # Raw copy operation for ranges of cells. # # This method is used by the higher-level copy operations for AbstractContainer -function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int, destination::Int) - copy_data!(target.parent_ids, source.parent_ids, first, last, destination) - copy_data!(target.child_ids, source.child_ids, first, last, destination, - n_children_per_cell(target)) - copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, - destination, n_directions(target)) - copy_data!(target.levels, source.levels, first, last, destination) - copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) - copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) - copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination) +function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int, + destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, + ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, + destination) + copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination) end - # Reset data structures by recreating all internal storage containers and invalidating all elements -function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS - t.parent_ids = Vector{Int}(undef, t.capacity + 1) - t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) - t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) - t.levels = Vector{Int}(undef, t.capacity + 1) - t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) - t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) - t.mpi_ranks = Vector{Int}(undef, t.capacity + 1) - - invalidate!(t, 1, capacity(t) + 1) +function reset_data_structures!(t::ParallelTree{NDIMS}) where {NDIMS} + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2 * NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + t.mpi_ranks = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) end - - end # @muladd diff --git a/src/meshes/parallel_tree_mesh.jl b/src/meshes/parallel_tree_mesh.jl index 0bad9befedf..050e419680c 100644 --- a/src/meshes/parallel_tree_mesh.jl +++ b/src/meshes/parallel_tree_mesh.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ partition!(mesh::ParallelTreeMesh, allow_coarsening=true) @@ -13,91 +13,91 @@ based on leaf cell count and tree structure. If `allow_coarsening` is `true`, the algorithm will keep leaf cells together on one rank when needed for local coarsening (i.e. when all children of a cell are leaves). """ -function partition!(mesh::ParallelTreeMesh; allow_coarsening=true) - # Determine number of leaf cells per rank - leaves = leaf_cells(mesh.tree) - @assert length(leaves) > mpi_nranks() "Too many ranks to properly partition the mesh!" - n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()), mpi_nranks()), - 0:(mpi_nranks() - 1)) - for d in 0:(rem(length(leaves), mpi_nranks()) - 1) - n_leaves_per_rank[d] += 1 - end - @assert sum(n_leaves_per_rank) == length(leaves) - - # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a - # rank - belong to the same rank - mesh.first_cell_by_rank = similar(n_leaves_per_rank) - mesh.n_cells_by_rank = similar(n_leaves_per_rank) - - leaf_count = 0 - mesh.first_cell_by_rank[0] = 1 - # Iterate over all ranks - for d in 0:(mpi_nranks() - 1) - leaf_count += n_leaves_per_rank[d] - last_id = leaves[leaf_count] - parent_id = mesh.tree.parent_ids[last_id] - - # Check if all children of the last parent are leaves - if allow_coarsening && - all(id -> is_leaf(mesh.tree, id), @view mesh.tree.child_ids[:, parent_id]) && - d < length(n_leaves_per_rank) - 1 - - # To keep children of parent together if they are all leaves, - # all children are added to this rank - additional_cells = (last_id+1):mesh.tree.child_ids[end, parent_id] - if length(additional_cells) > 0 - last_id = additional_cells[end] - - additional_leaves = count(id -> is_leaf(mesh.tree, id), additional_cells) - leaf_count += additional_leaves - # Add leaves to this rank, remove from next rank - n_leaves_per_rank[d] += additional_leaves - n_leaves_per_rank[d+1] -= additional_leaves - end +function partition!(mesh::ParallelTreeMesh; allow_coarsening = true) + # Determine number of leaf cells per rank + leaves = leaf_cells(mesh.tree) + @assert length(leaves)>mpi_nranks() "Too many ranks to properly partition the mesh!" + n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()), + mpi_nranks()), + 0:(mpi_nranks() - 1)) + for d in 0:(rem(length(leaves), mpi_nranks()) - 1) + n_leaves_per_rank[d] += 1 end - - @assert all(n -> n > 0, n_leaves_per_rank) "Too many ranks to properly partition the mesh!" - - mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 - mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d - - # Set first cell of next rank - if d < length(n_leaves_per_rank) - 1 - mesh.first_cell_by_rank[d+1] = mesh.first_cell_by_rank[d] + mesh.n_cells_by_rank[d] + @assert sum(n_leaves_per_rank) == length(leaves) + + # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a + # rank - belong to the same rank + mesh.first_cell_by_rank = similar(n_leaves_per_rank) + mesh.n_cells_by_rank = similar(n_leaves_per_rank) + + leaf_count = 0 + mesh.first_cell_by_rank[0] = 1 + # Iterate over all ranks + for d in 0:(mpi_nranks() - 1) + leaf_count += n_leaves_per_rank[d] + last_id = leaves[leaf_count] + parent_id = mesh.tree.parent_ids[last_id] + + # Check if all children of the last parent are leaves + if allow_coarsening && + all(id -> is_leaf(mesh.tree, id), @view mesh.tree.child_ids[:, parent_id]) && + d < length(n_leaves_per_rank) - 1 + + # To keep children of parent together if they are all leaves, + # all children are added to this rank + additional_cells = (last_id + 1):mesh.tree.child_ids[end, parent_id] + if length(additional_cells) > 0 + last_id = additional_cells[end] + + additional_leaves = count(id -> is_leaf(mesh.tree, id), + additional_cells) + leaf_count += additional_leaves + # Add leaves to this rank, remove from next rank + n_leaves_per_rank[d] += additional_leaves + n_leaves_per_rank[d + 1] -= additional_leaves + end + end + + @assert all(n -> n > 0, n_leaves_per_rank) "Too many ranks to properly partition the mesh!" + + mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 + mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d + + # Set first cell of next rank + if d < length(n_leaves_per_rank) - 1 + mesh.first_cell_by_rank[d + 1] = mesh.first_cell_by_rank[d] + + mesh.n_cells_by_rank[d] + end end - end - @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) - @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) + @assert all(x -> x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) + @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) - return nothing + return nothing end - function get_restart_mesh_filename(restart_filename, mpi_parallel::True) - # Get directory name - dirname, _ = splitdir(restart_filename) - - if mpi_isroot() - # Read mesh filename from restart file - mesh_file = "" - h5open(restart_filename, "r") do file - mesh_file = read(attributes(file)["mesh_file"]) + # Get directory name + dirname, _ = splitdir(restart_filename) + + if mpi_isroot() + # Read mesh filename from restart file + mesh_file = "" + h5open(restart_filename, "r") do file + mesh_file = read(attributes(file)["mesh_file"]) + end + + buffer = Vector{UInt8}(mesh_file) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else # non-root ranks + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + mesh_file = String(buffer) end - buffer = Vector{UInt8}(mesh_file) - MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - else # non-root ranks - count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) - buffer = Vector{UInt8}(undef, count[]) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - mesh_file = String(buffer) - end - - # Construct and return filename - return joinpath(dirname, mesh_file) + # Construct and return filename + return joinpath(dirname, mesh_file) end - - end # @muladd diff --git a/src/meshes/serial_tree.jl b/src/meshes/serial_tree.jl index a6d9eff37fc..143ac19f6ee 100644 --- a/src/meshes/serial_tree.jl +++ b/src/meshes/serial_tree.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Composite type that represents a NDIMS-dimensional tree (serial version). # @@ -26,190 +26,191 @@ # way. Also, depth-first ordering *might* not by guaranteed during # refinement/coarsening operations. mutable struct SerialTree{NDIMS} <: AbstractTree{NDIMS} - parent_ids::Vector{Int} - child_ids::Matrix{Int} - neighbor_ids::Matrix{Int} - levels::Vector{Int} - coordinates::Matrix{Float64} - original_cell_ids::Vector{Int} - - capacity::Int - length::Int - dummy::Int - - center_level_0::SVector{NDIMS, Float64} - length_level_0::Float64 - periodicity::NTuple{NDIMS, Bool} - - function SerialTree{NDIMS}(capacity::Integer) where NDIMS - # Verify that NDIMS is an integer - @assert NDIMS isa Integer - - # Create instance - t = new() - - # Initialize fields with defaults - # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations - t.parent_ids = fill(typemin(Int), capacity + 1) - t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) - t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) - t.levels = fill(typemin(Int), capacity + 1) - t.coordinates = fill(NaN, NDIMS, capacity + 1) - t.original_cell_ids = fill(typemin(Int), capacity + 1) - - t.capacity = capacity - t.length = 0 - t.dummy = capacity + 1 - - t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS)) - t.length_level_0 = NaN - - return t - end + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::SVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function SerialTree{NDIMS}(capacity::Integer) where {NDIMS} + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2 * NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS)) + t.length_level_0 = NaN + + return t + end end - # Constructor for passing the dimension as an argument -SerialTree(::Val{NDIMS}, args...) where NDIMS = SerialTree{NDIMS}(args...) +SerialTree(::Val{NDIMS}, args...) where {NDIMS} = SerialTree{NDIMS}(args...) # Create and initialize tree function SerialTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, - length::Real, periodicity=true) where NDIMS - # Create instance - t = SerialTree{NDIMS}(capacity) + length::Real, periodicity = true) where {NDIMS} + # Create instance + t = SerialTree{NDIMS}(capacity) - # Initialize root cell - init!(t, center, length, periodicity) + # Initialize root cell + init!(t, center, length, periodicity) - return t + return t end # Constructor accepting a single number as center (as opposed to an array) for 1D -SerialTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = SerialTree{1}(cap, [convert(Float64, center)], len, periodicity) - +function SerialTree{1}(cap::Int, center::Real, len::Real, periodicity = true) + SerialTree{1}(cap, [convert(Float64, center)], len, periodicity) +end # Clear tree with deleting data structures, store center and length, and create root cell -function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real, periodicity=true) - clear!(t) - - # Set domain information - t.center_level_0 = center - t.length_level_0 = length - - # Create root cell - t.length += 1 - t.parent_ids[1] = 0 - t.child_ids[:, 1] .= 0 - t.levels[1] = 0 - set_cell_coordinates!(t, t.center_level_0, 1) - t.original_cell_ids[1] = 0 - - # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor - if all(periodicity) - # Also catches case where periodicity = true - t.neighbor_ids[:, 1] .= 1 - t.periodicity = ntuple(x->true, ndims(t)) - elseif !any(periodicity) - # Also catches case where periodicity = false - t.neighbor_ids[:, 1] .= 0 - t.periodicity = ntuple(x->false, ndims(t)) - else - # Default case if periodicity is an iterable - for dimension in 1:ndims(t) - if periodicity[dimension] - t.neighbor_ids[2 * dimension - 1, 1] = 1 - t.neighbor_ids[2 * dimension - 0, 1] = 1 - else - t.neighbor_ids[2 * dimension - 1, 1] = 0 - t.neighbor_ids[2 * dimension - 0, 1] = 0 - end +function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real, + periodicity = true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + set_cell_coordinates!(t, t.center_level_0, 1) + t.original_cell_ids[1] = 0 + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x -> true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x -> false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) end - - t.periodicity = Tuple(periodicity) - end end - # Convenience output for debugging function Base.show(io::IO, ::MIME"text/plain", t::SerialTree) - @nospecialize t # reduce precompilation time - - l = t.length - println(io, '*'^20) - println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") - println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") - println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") - println(io, "t.levels[1:l] = $(t.levels[1:l])") - println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") - println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") - println(io, "t.capacity = $(t.capacity)") - println(io, "t.length = $(t.length)") - println(io, "t.dummy = $(t.dummy)") - println(io, "t.center_level_0 = $(t.center_level_0)") - println(io, "t.length_level_0 = $(t.length_level_0)") - println(io, '*'^20) + @nospecialize t # reduce precompilation time + + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, + "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, + "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) end - # Set information for child cell `child_id` based on parent cell `cell_id` (except neighbors) function init_child!(t::SerialTree, cell_id, child, child_id) - t.parent_ids[child_id] = cell_id - t.child_ids[child, cell_id] = child_id - t.child_ids[:, child_id] .= 0 - t.levels[child_id] = t.levels[cell_id] + 1 - set_cell_coordinates!(t, - child_coordinates(t, cell_coordinates(t, cell_id), length_at_cell(t, cell_id), child), child_id) - t.original_cell_ids[child_id] = 0 - - return nothing + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + set_cell_coordinates!(t, + child_coordinates(t, cell_coordinates(t, cell_id), + length_at_cell(t, cell_id), child), + child_id) + t.original_cell_ids[child_id] = 0 + + return nothing end - # Reset range of cells to values that are prone to cause errors as soon as they are used. # # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. function invalidate!(t::SerialTree, first::Int, last::Int) - @assert first > 0 - @assert last <= t.capacity + 1 - - # Integer values are set to smallest negative value, floating point values to NaN - t.parent_ids[first:last] .= typemin(Int) - t.child_ids[:, first:last] .= typemin(Int) - t.neighbor_ids[:, first:last] .= typemin(Int) - t.levels[first:last] .= typemin(Int) - t.coordinates[:, first:last] .= NaN - t.original_cell_ids[first:last] .= typemin(Int) - - return nothing + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + + return nothing end - # Raw copy operation for ranges of cells. # # This method is used by the higher-level copy operations for AbstractContainer -function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int, destination::Int) - copy_data!(target.parent_ids, source.parent_ids, first, last, destination) - copy_data!(target.child_ids, source.child_ids, first, last, destination, - n_children_per_cell(target)) - copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, - destination, n_directions(target)) - copy_data!(target.levels, source.levels, first, last, destination) - copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) - copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) +function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int, + destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, + ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, + destination) end - # Reset data structures by recreating all internal storage containers and invalidating all elements -function reset_data_structures!(t::SerialTree{NDIMS}) where NDIMS - t.parent_ids = Vector{Int}(undef, t.capacity + 1) - t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) - t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) - t.levels = Vector{Int}(undef, t.capacity + 1) - t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) - t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) - - invalidate!(t, 1, capacity(t) + 1) +function reset_data_structures!(t::SerialTree{NDIMS}) where {NDIMS} + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2 * NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) end - - end # @muladd diff --git a/src/meshes/structured_mesh.jl b/src/meshes/structured_mesh.jl index 32c4b6cc459..5872681933a 100644 --- a/src/meshes/structured_mesh.jl +++ b/src/meshes/structured_mesh.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ StructuredMesh{NDIMS} <: AbstractMesh{NDIMS} @@ -13,16 +13,15 @@ A structured curved mesh. Different numbers of cells per dimension are possible and arbitrary functions can be used as domain faces. """ -mutable struct StructuredMesh{NDIMS, RealT<:Real} <: AbstractMesh{NDIMS} - cells_per_dimension::NTuple{NDIMS, Int} - mapping::Any # Not relevant for performance - mapping_as_string::String - periodicity::NTuple{NDIMS, Bool} - current_filename::String - unsaved_changes::Bool +mutable struct StructuredMesh{NDIMS, RealT <: Real} <: AbstractMesh{NDIMS} + cells_per_dimension::NTuple{NDIMS, Int} + mapping::Any # Not relevant for performance + mapping_as_string::String + periodicity::NTuple{NDIMS, Bool} + current_filename::String + unsaved_changes::Bool end - """ StructuredMesh(cells_per_dimension, mapping; RealT=Float64, unsaved_changes=true, mapping_as_string=mapping2string(mapping, length(cells_per_dimension))) @@ -44,25 +43,28 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin The code string must define the mapping function with the name `mapping`. This will be changed in the future, see https://github.com/trixi-framework/Trixi.jl/issues/541. """ -function StructuredMesh(cells_per_dimension, mapping; RealT=Float64, periodicity=true, unsaved_changes=true, - mapping_as_string=mapping2string(mapping, length(cells_per_dimension))) - NDIMS = length(cells_per_dimension) - - # Convert periodicity to a Tuple of a Bool for every dimension - if all(periodicity) - # Also catches case where periodicity = true - periodicity = ntuple(_->true, NDIMS) - elseif !any(periodicity) - # Also catches case where periodicity = false - periodicity = ntuple(_->false, NDIMS) - else - # Default case if periodicity is an iterable - periodicity = Tuple(periodicity) - end - - return StructuredMesh{NDIMS, RealT}(Tuple(cells_per_dimension), mapping, mapping_as_string, periodicity, "", unsaved_changes) -end +function StructuredMesh(cells_per_dimension, mapping; RealT = Float64, + periodicity = true, unsaved_changes = true, + mapping_as_string = mapping2string(mapping, + length(cells_per_dimension))) + NDIMS = length(cells_per_dimension) + + # Convert periodicity to a Tuple of a Bool for every dimension + if all(periodicity) + # Also catches case where periodicity = true + periodicity = ntuple(_ -> true, NDIMS) + elseif !any(periodicity) + # Also catches case where periodicity = false + periodicity = ntuple(_ -> false, NDIMS) + else + # Default case if periodicity is an iterable + periodicity = Tuple(periodicity) + end + return StructuredMesh{NDIMS, RealT}(Tuple(cells_per_dimension), mapping, + mapping_as_string, periodicity, "", + unsaved_changes) +end """ StructuredMesh(cells_per_dimension, faces; RealT=Float64, unsaved_changes=true, faces_as_string=faces2string(faces)) @@ -83,28 +85,30 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin - `periodicity`: either a `Bool` deciding if all of the boundaries are periodic or an `NTuple{NDIMS, Bool}` deciding for each dimension if the boundaries in this dimension are periodic. """ -function StructuredMesh(cells_per_dimension, faces::Tuple; RealT=Float64, periodicity=true) - NDIMS = length(cells_per_dimension) +function StructuredMesh(cells_per_dimension, faces::Tuple; RealT = Float64, + periodicity = true) + NDIMS = length(cells_per_dimension) - validate_faces(faces) + validate_faces(faces) - # Use the transfinite mapping with the correct number of arguments - mapping = transfinite_mapping(faces) + # Use the transfinite mapping with the correct number of arguments + mapping = transfinite_mapping(faces) - # Collect definitions of face functions in one string (separated by semicolons) - face2substring(face) = code_string(face, ntuple(_ -> Float64, NDIMS-1)) - join_semicolon(strings) = join(strings, "; ") + # Collect definitions of face functions in one string (separated by semicolons) + face2substring(face) = code_string(face, ntuple(_ -> Float64, NDIMS - 1)) + join_semicolon(strings) = join(strings, "; ") - faces_definition = faces .|> face2substring .|> string |> join_semicolon + faces_definition = faces .|> face2substring .|> string |> join_semicolon - # Include faces definition in `mapping_as_string` to allow for evaluation - # without knowing the face functions - mapping_as_string = "$faces_definition; faces = $(string(faces)); mapping = transfinite_mapping(faces)" + # Include faces definition in `mapping_as_string` to allow for evaluation + # without knowing the face functions + mapping_as_string = "$faces_definition; faces = $(string(faces)); mapping = transfinite_mapping(faces)" - return StructuredMesh(cells_per_dimension, mapping; RealT=RealT, periodicity=periodicity, mapping_as_string=mapping_as_string) + return StructuredMesh(cells_per_dimension, mapping; RealT = RealT, + periodicity = periodicity, + mapping_as_string = mapping_as_string) end - """ StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max; periodicity=true) @@ -117,20 +121,24 @@ Create a StructuredMesh that represents a uncurved structured mesh with a rectan - `periodicity`: either a `Bool` deciding if all of the boundaries are periodic or an `NTuple{NDIMS, Bool}` deciding for each dimension if the boundaries in this dimension are periodic. """ -function StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max; periodicity=true) - NDIMS = length(cells_per_dimension) - RealT = promote_type(eltype(coordinates_min), eltype(coordinates_max)) - - mapping = coordinates2mapping(coordinates_min, coordinates_max) - mapping_as_string = "coordinates_min = $coordinates_min; " * - "coordinates_max = $coordinates_max; " * - "mapping = coordinates2mapping(coordinates_min, coordinates_max)" - return StructuredMesh(cells_per_dimension, mapping; RealT=RealT, periodicity=periodicity, mapping_as_string=mapping_as_string) +function StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max; + periodicity = true) + NDIMS = length(cells_per_dimension) + RealT = promote_type(eltype(coordinates_min), eltype(coordinates_max)) + + mapping = coordinates2mapping(coordinates_min, coordinates_max) + mapping_as_string = "coordinates_min = $coordinates_min; " * + "coordinates_max = $coordinates_max; " * + "mapping = coordinates2mapping(coordinates_min, coordinates_max)" + return StructuredMesh(cells_per_dimension, mapping; RealT = RealT, + periodicity = periodicity, + mapping_as_string = mapping_as_string) end - # Extract a string of the code that defines the mapping function -mapping2string(mapping, ndims) = string(code_string(mapping, ntuple(_ -> Float64, ndims))) +function mapping2string(mapping, ndims) + string(code_string(mapping, ntuple(_ -> Float64, ndims))) +end # An internal function wrapping `CodeTracking.code_string` with additional # error checking to avoid some problems when calling this function in @@ -138,171 +146,175 @@ mapping2string(mapping, ndims) = string(code_string(mapping, ntuple(_ -> Float64 # - https://github.com/trixi-framework/Trixi.jl/issues/931 # - https://github.com/trixi-framework/Trixi.jl/pull/1084 function code_string(f, t) - try - return CodeTracking.code_string(f, t) - catch e - return "" - end + try + return CodeTracking.code_string(f, t) + catch e + return "" + end end # Interpolate linearly between left and right value where s should be between -1 and 1 -linear_interpolate(s, left_value, right_value) = 0.5 * ((1 - s) * left_value + (1 + s) * right_value) - +function linear_interpolate(s, left_value, right_value) + 0.5 * ((1 - s) * left_value + (1 + s) * right_value) +end # Convert min and max coordinates of a rectangle to the corresponding transformation mapping function coordinates2mapping(coordinates_min::NTuple{1}, coordinates_max::NTuple{1}) - mapping(xi) = linear_interpolate(xi, coordinates_min[1], coordinates_max[1]) + mapping(xi) = linear_interpolate(xi, coordinates_min[1], coordinates_max[1]) end function coordinates2mapping(coordinates_min::NTuple{2}, coordinates_max::NTuple{2}) - mapping(xi, eta) = SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]), - linear_interpolate(eta, coordinates_min[2], coordinates_max[2])) + function mapping(xi, eta) + SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]), + linear_interpolate(eta, coordinates_min[2], coordinates_max[2])) + end end function coordinates2mapping(coordinates_min::NTuple{3}, coordinates_max::NTuple{3}) - mapping(xi, eta, zeta) = SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]), - linear_interpolate(eta, coordinates_min[2], coordinates_max[2]), - linear_interpolate(zeta, coordinates_min[3], coordinates_max[3])) + function mapping(xi, eta, zeta) + SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]), + linear_interpolate(eta, coordinates_min[2], coordinates_max[2]), + linear_interpolate(zeta, coordinates_min[3], coordinates_max[3])) + end end - # In 1D # Linear mapping from the reference element to the domain described by the faces function linear_mapping(x, faces) - return linear_interpolate(x, faces[1](), faces[2]()) + return linear_interpolate(x, faces[1](), faces[2]()) end - # In 2D # Bilinear mapping from the reference element to the domain described by the faces function bilinear_mapping(x, y, faces) - x1 = faces[1](-1) # Bottom left - x2 = faces[2](-1) # Bottom right - x3 = faces[1](1) # Top left - x4 = faces[2](1) # Top right - - return 0.25 * (x1 * (1 - x) * (1 - y) + - x2 * (1 + x) * (1 - y) + - x3 * (1 - x) * (1 + y) + - x4 * (1 + x) * (1 + y)) + x1 = faces[1](-1) # Bottom left + x2 = faces[2](-1) # Bottom right + x3 = faces[1](1) # Top left + x4 = faces[2](1) # Top right + + return 0.25 * (x1 * (1 - x) * (1 - y) + + x2 * (1 + x) * (1 - y) + + x3 * (1 - x) * (1 + y) + + x4 * (1 + x) * (1 + y)) end - # In 3D # Trilinear mapping from the reference element to the domain described by the faces function trilinear_mapping(x, y, z, faces) - x1 = faces[1](-1, -1) # mapped from (-1,-1,-1) - x2 = faces[2](-1, -1) # mapped from ( 1,-1,-1) - x3 = faces[1]( 1, -1) # mapped from (-1, 1,-1) - x4 = faces[2]( 1, -1) # mapped from ( 1, 1,-1) - x5 = faces[1](-1, 1) # mapped from (-1,-1, 1) - x6 = faces[2](-1, 1) # mapped from ( 1,-1, 1) - x7 = faces[1]( 1, 1) # mapped from (-1, 1, 1) - x8 = faces[2]( 1, 1) # mapped from ( 1, 1, 1) - - return 0.125 * (x1 * (1 - x) * (1 - y) * (1 - z) + - x2 * (1 + x) * (1 - y) * (1 - z) + - x3 * (1 - x) * (1 + y) * (1 - z) + - x4 * (1 + x) * (1 + y) * (1 - z) + - x5 * (1 - x) * (1 - y) * (1 + z) + - x6 * (1 + x) * (1 - y) * (1 + z) + - x7 * (1 - x) * (1 + y) * (1 + z) + - x8 * (1 + x) * (1 + y) * (1 + z) ) + x1 = faces[1](-1, -1) # mapped from (-1,-1,-1) + x2 = faces[2](-1, -1) # mapped from ( 1,-1,-1) + x3 = faces[1](1, -1) # mapped from (-1, 1,-1) + x4 = faces[2](1, -1) # mapped from ( 1, 1,-1) + x5 = faces[1](-1, 1) # mapped from (-1,-1, 1) + x6 = faces[2](-1, 1) # mapped from ( 1,-1, 1) + x7 = faces[1](1, 1) # mapped from (-1, 1, 1) + x8 = faces[2](1, 1) # mapped from ( 1, 1, 1) + + return 0.125 * (x1 * (1 - x) * (1 - y) * (1 - z) + + x2 * (1 + x) * (1 - y) * (1 - z) + + x3 * (1 - x) * (1 + y) * (1 - z) + + x4 * (1 + x) * (1 + y) * (1 - z) + + x5 * (1 - x) * (1 - y) * (1 + z) + + x6 * (1 + x) * (1 - y) * (1 + z) + + x7 * (1 - x) * (1 + y) * (1 + z) + + x8 * (1 + x) * (1 + y) * (1 + z)) end - # Use linear mapping in 1D transfinite_mapping(faces::NTuple{2, Any}) = x -> linear_mapping(x, faces) # In 2D # Transfinite mapping from the reference element to the domain described by the faces function transfinite_mapping(faces::NTuple{4, Any}) - mapping(x, y) = (linear_interpolate(x, faces[1](y), faces[2](y)) + - linear_interpolate(y, faces[3](x), faces[4](x)) - - bilinear_mapping(x, y, faces)) + function mapping(x, y) + (linear_interpolate(x, faces[1](y), faces[2](y)) + + linear_interpolate(y, faces[3](x), faces[4](x)) - + bilinear_mapping(x, y, faces)) + end end - # In 3D # Correction term for the Transfinite mapping function correction_term_3d(x, y, z, faces) - # Correction for x-terms - c_x = linear_interpolate(x, linear_interpolate(y, faces[3](-1, z), faces[4](-1, z)) + - linear_interpolate(z, faces[5](-1, y), faces[6](-1, y)), - linear_interpolate(y, faces[3]( 1, z), faces[4]( 1, z)) + - linear_interpolate(z, faces[5]( 1, y), faces[6]( 1, y)) ) - - # Correction for y-terms - c_y = linear_interpolate(y, linear_interpolate(x, faces[1](-1, z), faces[2](-1, z)) + - linear_interpolate(z, faces[5]( x, -1), faces[6]( x, -1)), - linear_interpolate(x, faces[1]( 1, z), faces[2]( 1, z)) + - linear_interpolate(z, faces[5]( x, 1), faces[6]( x, 1)) ) - - # Correction for x-terms - c_z = linear_interpolate(z, linear_interpolate(x, faces[1](y, -1), faces[2](y, -1)) + - linear_interpolate(y, faces[3](x, -1), faces[4](x, -1)), - linear_interpolate(x, faces[1](y, 1), faces[2](y, 1)) + - linear_interpolate(y, faces[3](x, 1), faces[4](x, 1)) ) - - return 0.5 * (c_x + c_y + c_z) + # Correction for x-terms + c_x = linear_interpolate(x, + linear_interpolate(y, faces[3](-1, z), faces[4](-1, z)) + + linear_interpolate(z, faces[5](-1, y), faces[6](-1, y)), + linear_interpolate(y, faces[3](1, z), faces[4](1, z)) + + linear_interpolate(z, faces[5](1, y), faces[6](1, y))) + + # Correction for y-terms + c_y = linear_interpolate(y, + linear_interpolate(x, faces[1](-1, z), faces[2](-1, z)) + + linear_interpolate(z, faces[5](x, -1), faces[6](x, -1)), + linear_interpolate(x, faces[1](1, z), faces[2](1, z)) + + linear_interpolate(z, faces[5](x, 1), faces[6](x, 1))) + + # Correction for x-terms + c_z = linear_interpolate(z, + linear_interpolate(x, faces[1](y, -1), faces[2](y, -1)) + + linear_interpolate(y, faces[3](x, -1), faces[4](x, -1)), + linear_interpolate(x, faces[1](y, 1), faces[2](y, 1)) + + linear_interpolate(y, faces[3](x, 1), faces[4](x, 1))) + + return 0.5 * (c_x + c_y + c_z) end - # In 3D # Transfinite mapping from the reference element to the domain described by the faces function transfinite_mapping(faces::NTuple{6, Any}) - mapping(x, y, z) = (linear_interpolate(x, faces[1](y, z), faces[2](y, z)) + - linear_interpolate(y, faces[3](x, z), faces[4](x, z)) + - linear_interpolate(z, faces[5](x, y), faces[6](x, y)) - - correction_term_3d(x, y, z, faces) + - trilinear_mapping(x, y, z, faces)) + function mapping(x, y, z) + (linear_interpolate(x, faces[1](y, z), faces[2](y, z)) + + linear_interpolate(y, faces[3](x, z), faces[4](x, z)) + + linear_interpolate(z, faces[5](x, y), faces[6](x, y)) - + correction_term_3d(x, y, z, faces) + + trilinear_mapping(x, y, z, faces)) + end end - function validate_faces(faces::NTuple{2, Any}) end function validate_faces(faces::NTuple{4, Any}) - @assert faces[1](-1) ≈ faces[3](-1) "faces[1](-1) needs to match faces[3](-1) (bottom left corner)" - @assert faces[2](-1) ≈ faces[3]( 1) "faces[2](-1) needs to match faces[3](1) (bottom right corner)" - @assert faces[1]( 1) ≈ faces[4](-1) "faces[1](1) needs to match faces[4](-1) (top left corner)" - @assert faces[2]( 1) ≈ faces[4]( 1) "faces[2](1) needs to match faces[4](1) (top right corner)" + @assert faces[1](-1)≈faces[3](-1) "faces[1](-1) needs to match faces[3](-1) (bottom left corner)" + @assert faces[2](-1)≈faces[3](1) "faces[2](-1) needs to match faces[3](1) (bottom right corner)" + @assert faces[1](1)≈faces[4](-1) "faces[1](1) needs to match faces[4](-1) (top left corner)" + @assert faces[2](1)≈faces[4](1) "faces[2](1) needs to match faces[4](1) (top right corner)" end function validate_faces(faces::NTuple{6, Any}) - @assert (faces[1](-1, -1) ≈ - faces[3](-1, -1) ≈ - faces[5](-1, -1)) "faces[1](-1, -1), faces[3](-1, -1) and faces[5](-1, -1) need to match at (-1, -1, -1) corner" + @assert (faces[1](-1, -1)≈ + faces[3](-1, -1)≈ + faces[5](-1, -1)) "faces[1](-1, -1), faces[3](-1, -1) and faces[5](-1, -1) need to match at (-1, -1, -1) corner" - @assert (faces[2](-1, -1) ≈ - faces[3]( 1, -1) ≈ - faces[5]( 1, -1)) "faces[2](-1, -1), faces[3](1, -1) and faces[5](1, -1) need to match at (1, -1, -1) corner" + @assert (faces[2](-1, -1)≈ + faces[3](1, -1)≈ + faces[5](1, -1)) "faces[2](-1, -1), faces[3](1, -1) and faces[5](1, -1) need to match at (1, -1, -1) corner" - @assert (faces[1]( 1, -1) ≈ - faces[4](-1, -1) ≈ - faces[5](-1, 1)) "faces[1](1, -1), faces[4](-1, -1) and faces[5](-1, 1) need to match at (-1, 1, -1) corner" + @assert (faces[1](1, -1)≈ + faces[4](-1, -1)≈ + faces[5](-1, 1)) "faces[1](1, -1), faces[4](-1, -1) and faces[5](-1, 1) need to match at (-1, 1, -1) corner" - @assert (faces[2]( 1, -1) ≈ - faces[4]( 1, -1) ≈ - faces[5]( 1, 1)) "faces[2](1, -1), faces[4](1, -1) and faces[5](1, 1) need to match at (1, 1, -1) corner" + @assert (faces[2](1, -1)≈ + faces[4](1, -1)≈ + faces[5](1, 1)) "faces[2](1, -1), faces[4](1, -1) and faces[5](1, 1) need to match at (1, 1, -1) corner" - @assert (faces[1](-1, 1) ≈ - faces[3](-1, 1) ≈ - faces[6](-1, -1)) "faces[1](-1, 1), faces[3](-1, 1) and faces[6](-1, -1) need to match at (-1, -1, 1) corner" + @assert (faces[1](-1, 1)≈ + faces[3](-1, 1)≈ + faces[6](-1, -1)) "faces[1](-1, 1), faces[3](-1, 1) and faces[6](-1, -1) need to match at (-1, -1, 1) corner" - @assert (faces[2](-1, 1) ≈ - faces[3]( 1, 1) ≈ - faces[6]( 1, -1)) "faces[2](-1, 1), faces[3](1, 1) and faces[6](1, -1) need to match at (1, -1, 1) corner" + @assert (faces[2](-1, 1)≈ + faces[3](1, 1)≈ + faces[6](1, -1)) "faces[2](-1, 1), faces[3](1, 1) and faces[6](1, -1) need to match at (1, -1, 1) corner" - @assert (faces[1]( 1, 1) ≈ - faces[4](-1, 1) ≈ - faces[6](-1, 1)) "faces[1](1, 1), faces[4](-1, 1) and faces[6](-1, 1) need to match at (-1, 1, 1) corner" + @assert (faces[1](1, 1)≈ + faces[4](-1, 1)≈ + faces[6](-1, 1)) "faces[1](1, 1), faces[4](-1, 1) and faces[6](-1, 1) need to match at (-1, 1, 1) corner" - @assert (faces[2]( 1, 1) ≈ - faces[4]( 1, 1) ≈ - faces[6]( 1, 1)) "faces[2](1, 1), faces[4](1, 1) and faces[6](1, 1) need to match at (1, 1, 1) corner" + @assert (faces[2](1, 1)≈ + faces[4](1, 1)≈ + faces[6](1, 1)) "faces[2](1, 1), faces[4](1, 1) and faces[6](1, 1) need to match at (1, 1, 1) corner" end - # Check if mesh is periodic isperiodic(mesh::StructuredMesh) = all(mesh.periodicity) isperiodic(mesh::StructuredMesh, dimension) = mesh.periodicity[dimension] @@ -314,28 +326,26 @@ Base.size(mesh::StructuredMesh, i) = mesh.cells_per_dimension[i] Base.axes(mesh::StructuredMesh) = map(Base.OneTo, mesh.cells_per_dimension) Base.axes(mesh::StructuredMesh, i) = Base.OneTo(mesh.cells_per_dimension[i]) - function Base.show(io::IO, mesh::StructuredMesh) - print(io, "StructuredMesh{", ndims(mesh), ", ", real(mesh), "}") + print(io, "StructuredMesh{", ndims(mesh), ", ", real(mesh), "}") end - function Base.show(io::IO, ::MIME"text/plain", mesh::StructuredMesh) - if get(io, :compact, false) - show(io, mesh) - else - summary_header(io, "StructuredMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * "}") - summary_line(io, "size", size(mesh)) - - summary_line(io, "mapping", "") - # Print code lines of mapping_as_string - mapping_lines = split(mesh.mapping_as_string, ";") - for i in eachindex(mapping_lines) - summary_line(increment_indent(io), "line $i", strip(mapping_lines[i])) + if get(io, :compact, false) + show(io, mesh) + else + summary_header(io, + "StructuredMesh{" * string(ndims(mesh)) * ", " * + string(real(mesh)) * "}") + summary_line(io, "size", size(mesh)) + + summary_line(io, "mapping", "") + # Print code lines of mapping_as_string + mapping_lines = split(mesh.mapping_as_string, ";") + for i in eachindex(mapping_lines) + summary_line(increment_indent(io), "line $i", strip(mapping_lines[i])) + end + summary_footer(io) end - summary_footer(io) - end end - - end # @muladd diff --git a/src/meshes/surface_interpolant.jl b/src/meshes/surface_interpolant.jl index ec38ee3f905..22d14e38c5c 100644 --- a/src/meshes/surface_interpolant.jl +++ b/src/meshes/surface_interpolant.jl @@ -3,125 +3,126 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # CurvedSurface{RealT<:Real} # # Contains the data needed to represent a curve with data points (x,y) as a Lagrange polynomial # interpolant written in barycentric form at a given set of nodes. -struct CurvedSurface{RealT<:Real} - nodes ::Vector{RealT} - barycentric_weights ::Vector{RealT} - coordinates ::Array{RealT, 2} #[nnodes, ndims] +struct CurvedSurface{RealT <: Real} + nodes :: Vector{RealT} + barycentric_weights :: Vector{RealT} + coordinates :: Array{RealT, 2} #[nnodes, ndims] end - # evaluate the Gamma curve interpolant at a particular point s and return the (x,y) coordinate function evaluate_at(s, boundary_curve::CurvedSurface) - - @unpack nodes, barycentric_weights, coordinates = boundary_curve - - x_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, view(coordinates, :, 1), - barycentric_weights) - y_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, view(coordinates, :, 2), - barycentric_weights) - - return x_coordinate_at_s_on_boundary_curve, y_coordinate_at_s_on_boundary_curve + @unpack nodes, barycentric_weights, coordinates = boundary_curve + + x_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, + view(coordinates, :, + 1), + barycentric_weights) + y_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, + view(coordinates, :, + 2), + barycentric_weights) + + return x_coordinate_at_s_on_boundary_curve, y_coordinate_at_s_on_boundary_curve end - # evaluate the derivative of a Gamma curve interpolant at a particular point s # and return the (x,y) coordinate function derivative_at(s, boundary_curve::CurvedSurface) - - @unpack nodes, barycentric_weights, coordinates = boundary_curve - - x_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, nodes, - view(coordinates, :, 1), - barycentric_weights) - y_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, nodes, - view(coordinates, :, 2), - barycentric_weights) - return x_coordinate_at_s_on_boundary_curve_prime, y_coordinate_at_s_on_boundary_curve_prime + @unpack nodes, barycentric_weights, coordinates = boundary_curve + + x_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, + nodes, + view(coordinates, + :, + 1), + barycentric_weights) + y_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, + nodes, + view(coordinates, + :, + 2), + barycentric_weights) + return x_coordinate_at_s_on_boundary_curve_prime, + y_coordinate_at_s_on_boundary_curve_prime end - # Chebysehv-Gauss-Lobatto nodes and weights for use with curved boundaries function chebyshev_gauss_lobatto_nodes_weights(n_nodes::Integer) - # Initialize output - nodes = zeros(n_nodes) - weights = zeros(n_nodes) + # Initialize output + nodes = zeros(n_nodes) + weights = zeros(n_nodes) - # Get polynomial degree for convenience - N = n_nodes - 1 + # Get polynomial degree for convenience + N = n_nodes - 1 - for j in 1:n_nodes - nodes[j] = -cospi( (j-1) / N ) - weights[j] = pi / N - end - weights[1] = 0.5 * weights[1] - weights[end] = 0.5 * weights[end] + for j in 1:n_nodes + nodes[j] = -cospi((j - 1) / N) + weights[j] = pi / N + end + weights[1] = 0.5 * weights[1] + weights[end] = 0.5 * weights[end] - return nodes, weights + return nodes, weights end - # Calculate Lagrange interpolating polynomial of a function f(x) at a given point x for a given # node distribution. function lagrange_interpolation(x, nodes, fvals, wbary) -# Barycentric two formulation of Lagrange interpolant - numerator = zero(eltype(fvals)) - denominator = zero(eltype(fvals)) + # Barycentric two formulation of Lagrange interpolant + numerator = zero(eltype(fvals)) + denominator = zero(eltype(fvals)) - for j in eachindex(nodes) - if isapprox(x, nodes[j], rtol=eps(x)) - return fvals[j] + for j in eachindex(nodes) + if isapprox(x, nodes[j], rtol = eps(x)) + return fvals[j] + end + t = wbary[j] / (x - nodes[j]) + numerator += t * fvals[j] + denominator += t end - t = wbary[j] / ( x - nodes[j] ) - numerator += t * fvals[j] - denominator += t - end - return numerator/denominator + return numerator / denominator end - # Calculate derivative of a Lagrange interpolating polynomial of a function f(x) at a given # point x for a given node distribution. function lagrange_interpolation_derivative(x, nodes, fvals, wbary) + at_node = false + numerator = zero(eltype(fvals)) + i = 0 - at_node = false - numerator = zero(eltype(fvals)) - i = 0 - - for j in eachindex(nodes) - if isapprox(x, nodes[j]) - at_node = true - p = fvals[j] - denominator = -wbary[j] - i = j - end - end - - if at_node for j in eachindex(nodes) - if j != i - numerator += wbary[j] * ( p - fvals[j] ) / ( x - nodes[j] ) - end + if isapprox(x, nodes[j]) + at_node = true + p = fvals[j] + denominator = -wbary[j] + i = j + end end - else - denominator = zero(eltype(fvals)) - p = lagrange_interpolation(x, nodes, fvals, wbary) - for j in eachindex(nodes) - t = wbary[j] / (x - nodes[j]) - numerator += t * ( p - fvals[j] ) / ( x - nodes[j] ) - denominator += t + + if at_node + for j in eachindex(nodes) + if j != i + numerator += wbary[j] * (p - fvals[j]) / (x - nodes[j]) + end + end + else + denominator = zero(eltype(fvals)) + p = lagrange_interpolation(x, nodes, fvals, wbary) + for j in eachindex(nodes) + t = wbary[j] / (x - nodes[j]) + numerator += t * (p - fvals[j]) / (x - nodes[j]) + denominator += t + end end - end - return numerator/denominator # p_prime + return numerator / denominator # p_prime end - - end # @muladd diff --git a/src/meshes/transfinite_mappings_3d.jl b/src/meshes/transfinite_mappings_3d.jl index 36ca3f95551..59a02f33e1a 100644 --- a/src/meshes/transfinite_mappings_3d.jl +++ b/src/meshes/transfinite_mappings_3d.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # Illustration of the corner (circled), edge (braces), and face index numbering convention # used in these functions. @@ -46,38 +47,38 @@ # │╱ │╱ └─────> x # ①───────────────────────{1}─────────────────────────② - # Transfinite mapping formula from a point (xi, eta, zeta) in reference space [-1,1]^3 to a # physical coordinate (x, y, z) for a hexahedral element with straight sides function straight_side_hex_map(xi, eta, zeta, corner_points) - - coordinate = zeros(eltype(xi), 3) - for j in 1:3 - coordinate[j] += (0.125 * ( corner_points[j, 1] * (1 - xi) * (1 - eta) * (1 - zeta) - + corner_points[j, 2] * (1 + xi) * (1 - eta) * (1 - zeta) - + corner_points[j, 3] * (1 + xi) * (1 + eta) * (1 - zeta) - + corner_points[j, 4] * (1 - xi) * (1 + eta) * (1 - zeta) - + corner_points[j, 5] * (1 - xi) * (1 - eta) * (1 + zeta) - + corner_points[j, 6] * (1 + xi) * (1 - eta) * (1 + zeta) - + corner_points[j, 7] * (1 + xi) * (1 + eta) * (1 + zeta) - + corner_points[j, 8] * (1 - xi) * (1 + eta) * (1 + zeta) ) ) - end - - return coordinate + coordinate = zeros(eltype(xi), 3) + for j in 1:3 + coordinate[j] += (0.125 * + (corner_points[j, 1] * (1 - xi) * (1 - eta) * (1 - zeta) + + corner_points[j, 2] * (1 + xi) * (1 - eta) * (1 - zeta) + + corner_points[j, 3] * (1 + xi) * (1 + eta) * (1 - zeta) + + corner_points[j, 4] * (1 - xi) * (1 + eta) * (1 - zeta) + + corner_points[j, 5] * (1 - xi) * (1 - eta) * (1 + zeta) + + corner_points[j, 6] * (1 + xi) * (1 - eta) * (1 + zeta) + + corner_points[j, 7] * (1 + xi) * (1 + eta) * (1 + zeta) + + corner_points[j, 8] * (1 - xi) * (1 + eta) * (1 + zeta))) + end + + return coordinate end - # Construct the (x, y, z) node coordinates in the volume of a straight sided hexahedral element -function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, nodes, corners) - - for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) - node_coordinates[:, i, j, k, element] .= straight_side_hex_map(nodes[i], nodes[j], nodes[k], corners) - end - - return node_coordinates +function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, + nodes, corners) + for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) + node_coordinates[:, i, j, k, element] .= straight_side_hex_map(nodes[i], + nodes[j], + nodes[k], + corners) + end + + return node_coordinates end - # Transfinite mapping formula from a point (xi, eta, zeta) in reference space [-1,1]^3 to a point # (x,y,z) in physical coordinate space for a hexahedral element with general curved sides # See Section 4.3 @@ -86,87 +87,88 @@ end # transmission of waves from moving material interfaces # [PhD thesis, Florida State University](https://diginole.lib.fsu.edu/islandora/object/fsu%3A185342) function transfinite_hex_map(xi, eta, zeta, face_curves::AbstractVector{<:CurvedFace}) - - coordinate = zeros(eltype(xi), 3) - face_values = zeros(eltype(xi), (3, 6)) - edge_values = zeros(eltype(xi), (3, 12)) - corners = zeros(eltype(xi), (3, 8)) - - # Compute values along the face edges - edge_values[:, 1] .= evaluate_at(SVector(xi, -1), face_curves[1]) - edge_values[:, 2] .= evaluate_at(SVector( 1, zeta), face_curves[1]) - edge_values[:, 3] .= evaluate_at(SVector(xi, 1), face_curves[1]) - edge_values[:, 4] .= evaluate_at(SVector(-1, zeta), face_curves[1]) - - edge_values[:, 5] .= evaluate_at(SVector(xi, -1), face_curves[2]) - edge_values[:, 6] .= evaluate_at(SVector( 1, zeta), face_curves[2]) - edge_values[:, 7] .= evaluate_at(SVector(xi, 1), face_curves[2]) - edge_values[:, 8] .= evaluate_at(SVector(-1, zeta), face_curves[2]) - - edge_values[:, 9] .= evaluate_at(SVector(eta, -1), face_curves[6]) - edge_values[:, 10] .= evaluate_at(SVector(eta, -1), face_curves[4]) - edge_values[:, 11] .= evaluate_at(SVector(eta, 1), face_curves[4]) - edge_values[:, 12] .= evaluate_at(SVector(eta, 1), face_curves[6]) - - # Compute values on the face - face_values[:, 1] .= evaluate_at(SVector( xi, zeta), face_curves[1]) - face_values[:, 2] .= evaluate_at(SVector( xi, zeta), face_curves[2]) - face_values[:, 3] .= evaluate_at(SVector( xi, eta), face_curves[3]) - face_values[:, 4] .= evaluate_at(SVector(eta, zeta), face_curves[4]) - face_values[:, 5] .= evaluate_at(SVector( xi, eta), face_curves[5]) - face_values[:, 6] .= evaluate_at(SVector(eta, zeta), face_curves[6]) - - # Pull the eight corner values and compute the straight sided hex mapping - corners[:,1] .= face_curves[1].coordinates[:, 1, 1] - corners[:,2] .= face_curves[1].coordinates[:, end, 1] - corners[:,3] .= face_curves[2].coordinates[:, end, 1] - corners[:,4] .= face_curves[2].coordinates[:, 1, 1] - corners[:,5] .= face_curves[1].coordinates[:, 1, end] - corners[:,6] .= face_curves[1].coordinates[:, end, end] - corners[:,7] .= face_curves[2].coordinates[:, end, end] - corners[:,8] .= face_curves[2].coordinates[:, 1, end] - - coordinate_straight = straight_side_hex_map(xi, eta, zeta, corners) - - # Compute the transfinite mapping - for j in 1:3 - # Linear interpolation between opposite faces - coordinate[j] = ( 0.5 * ( face_values[j, 6] * (1 - xi ) + face_values[j, 4] * (1 + xi ) - + face_values[j, 1] * (1 - eta ) + face_values[j, 2] * (1 + eta ) - + face_values[j, 3] * (1 - zeta) + face_values[j, 5] * (1 + zeta) ) ) - - # Edge corrections to ensure faces match - coordinate[j] -= ( 0.25 * ( edge_values[j, 1 ] * (1 - eta) * (1 - zeta) - + edge_values[j, 2 ] * (1 + xi ) * (1 - eta ) - + edge_values[j, 3 ] * (1 - eta) * (1 + zeta) - + edge_values[j, 4 ] * (1 - xi ) * (1 - eta ) - + edge_values[j, 5 ] * (1 + eta) * (1 - zeta) - + edge_values[j, 6 ] * (1 + xi ) * (1 + eta ) - + edge_values[j, 7 ] * (1 + eta) * (1 + zeta) - + edge_values[j, 8 ] * (1 - xi ) * (1 + eta ) - + edge_values[j, 9 ] * (1 - xi ) * (1 - zeta) - + edge_values[j, 10] * (1 + xi ) * (1 - zeta) - + edge_values[j, 11] * (1 + xi ) * (1 + zeta) - + edge_values[j, 12] * (1 - xi ) * (1 + zeta) ) ) - - # Subtracted interior twice, so add back the straight-sided hexahedral mapping - coordinate[j] += coordinate_straight[j] - end - - return coordinate + coordinate = zeros(eltype(xi), 3) + face_values = zeros(eltype(xi), (3, 6)) + edge_values = zeros(eltype(xi), (3, 12)) + corners = zeros(eltype(xi), (3, 8)) + + # Compute values along the face edges + edge_values[:, 1] .= evaluate_at(SVector(xi, -1), face_curves[1]) + edge_values[:, 2] .= evaluate_at(SVector(1, zeta), face_curves[1]) + edge_values[:, 3] .= evaluate_at(SVector(xi, 1), face_curves[1]) + edge_values[:, 4] .= evaluate_at(SVector(-1, zeta), face_curves[1]) + + edge_values[:, 5] .= evaluate_at(SVector(xi, -1), face_curves[2]) + edge_values[:, 6] .= evaluate_at(SVector(1, zeta), face_curves[2]) + edge_values[:, 7] .= evaluate_at(SVector(xi, 1), face_curves[2]) + edge_values[:, 8] .= evaluate_at(SVector(-1, zeta), face_curves[2]) + + edge_values[:, 9] .= evaluate_at(SVector(eta, -1), face_curves[6]) + edge_values[:, 10] .= evaluate_at(SVector(eta, -1), face_curves[4]) + edge_values[:, 11] .= evaluate_at(SVector(eta, 1), face_curves[4]) + edge_values[:, 12] .= evaluate_at(SVector(eta, 1), face_curves[6]) + + # Compute values on the face + face_values[:, 1] .= evaluate_at(SVector(xi, zeta), face_curves[1]) + face_values[:, 2] .= evaluate_at(SVector(xi, zeta), face_curves[2]) + face_values[:, 3] .= evaluate_at(SVector(xi, eta), face_curves[3]) + face_values[:, 4] .= evaluate_at(SVector(eta, zeta), face_curves[4]) + face_values[:, 5] .= evaluate_at(SVector(xi, eta), face_curves[5]) + face_values[:, 6] .= evaluate_at(SVector(eta, zeta), face_curves[6]) + + # Pull the eight corner values and compute the straight sided hex mapping + corners[:, 1] .= face_curves[1].coordinates[:, 1, 1] + corners[:, 2] .= face_curves[1].coordinates[:, end, 1] + corners[:, 3] .= face_curves[2].coordinates[:, end, 1] + corners[:, 4] .= face_curves[2].coordinates[:, 1, 1] + corners[:, 5] .= face_curves[1].coordinates[:, 1, end] + corners[:, 6] .= face_curves[1].coordinates[:, end, end] + corners[:, 7] .= face_curves[2].coordinates[:, end, end] + corners[:, 8] .= face_curves[2].coordinates[:, 1, end] + + coordinate_straight = straight_side_hex_map(xi, eta, zeta, corners) + + # Compute the transfinite mapping + for j in 1:3 + # Linear interpolation between opposite faces + coordinate[j] = (0.5 * + (face_values[j, 6] * (1 - xi) + face_values[j, 4] * (1 + xi) + + face_values[j, 1] * (1 - eta) + + face_values[j, 2] * (1 + eta) + + face_values[j, 3] * (1 - zeta) + + face_values[j, 5] * (1 + zeta))) + + # Edge corrections to ensure faces match + coordinate[j] -= (0.25 * (edge_values[j, 1] * (1 - eta) * (1 - zeta) + + edge_values[j, 2] * (1 + xi) * (1 - eta) + + edge_values[j, 3] * (1 - eta) * (1 + zeta) + + edge_values[j, 4] * (1 - xi) * (1 - eta) + + edge_values[j, 5] * (1 + eta) * (1 - zeta) + + edge_values[j, 6] * (1 + xi) * (1 + eta) + + edge_values[j, 7] * (1 + eta) * (1 + zeta) + + edge_values[j, 8] * (1 - xi) * (1 + eta) + + edge_values[j, 9] * (1 - xi) * (1 - zeta) + + edge_values[j, 10] * (1 + xi) * (1 - zeta) + + edge_values[j, 11] * (1 + xi) * (1 + zeta) + + edge_values[j, 12] * (1 - xi) * (1 + zeta))) + + # Subtracted interior twice, so add back the straight-sided hexahedral mapping + coordinate[j] += coordinate_straight[j] + end + + return coordinate end - # Construct the (x, y, z) node coordinates in the volume of a curved sided hexahedral element -function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, nodes, +function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, + nodes, face_curves::AbstractVector{<:CurvedFace}) + for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) + node_coordinates[:, i, j, k, element] .= transfinite_hex_map(nodes[i], nodes[j], + nodes[k], + face_curves) + end - for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes) - node_coordinates[:, i, j, k, element] .= transfinite_hex_map(nodes[i], nodes[j], nodes[k], face_curves) - end - - return node_coordinates + return node_coordinates end - - end # @muladd diff --git a/src/meshes/tree_mesh.jl b/src/meshes/tree_mesh.jl index 37ab3879e3e..34794ded852 100644 --- a/src/meshes/tree_mesh.jl +++ b/src/meshes/tree_mesh.jl @@ -3,13 +3,12 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent include("abstract_tree.jl") include("serial_tree.jl") include("parallel_tree.jl") - get_name(mesh::AbstractMesh) = mesh |> typeof |> nameof |> string # Composite type to hold the actual tree in addition to other mesh-related data @@ -25,49 +24,55 @@ get_name(mesh::AbstractMesh) = mesh |> typeof |> nameof |> string A Cartesian mesh based on trees of hypercubes to support adaptive mesh refinement. """ -mutable struct TreeMesh{NDIMS, TreeType<:AbstractTree{NDIMS}} <: AbstractMesh{NDIMS} - tree::TreeType - current_filename::String - unsaved_changes::Bool - first_cell_by_rank::OffsetVector{Int, Vector{Int}} - n_cells_by_rank::OffsetVector{Int, Vector{Int}} - - function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType<:AbstractTree{NDIMS}} - # Create mesh - m = new() - m.tree = TreeType(n_cells_max) - m.current_filename = "" - m.unsaved_changes = true - m.first_cell_by_rank = OffsetVector(Int[], 0) - m.n_cells_by_rank = OffsetVector(Int[], 0) - - return m - end - - # TODO: Taal refactor, order of important arguments, use of n_cells_max? - # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 - # TODO: Taal refactor, use NTuple instead of domain_center::AbstractArray{Float64} - function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, - domain_length, periodicity=true) where {NDIMS, TreeType<:AbstractTree{NDIMS}} - @assert NDIMS isa Integer && NDIMS > 0 +mutable struct TreeMesh{NDIMS, TreeType <: AbstractTree{NDIMS}} <: AbstractMesh{NDIMS} + tree::TreeType + current_filename::String + unsaved_changes::Bool + first_cell_by_rank::OffsetVector{Int, Vector{Int}} + n_cells_by_rank::OffsetVector{Int, Vector{Int}} + + function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, + TreeType <: + AbstractTree{NDIMS}} + # Create mesh + m = new() + m.tree = TreeType(n_cells_max) + m.current_filename = "" + m.unsaved_changes = true + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) + + return m + end - # Create mesh - m = new() - m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) - m.current_filename = "" - m.unsaved_changes = true - m.first_cell_by_rank = OffsetVector(Int[], 0) - m.n_cells_by_rank = OffsetVector(Int[], 0) - - return m - end + # TODO: Taal refactor, order of important arguments, use of n_cells_max? + # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 + # TODO: Taal refactor, use NTuple instead of domain_center::AbstractArray{Float64} + function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, + domain_center::AbstractArray{Float64}, + domain_length, + periodicity = true) where {NDIMS, + TreeType <: + AbstractTree{NDIMS}} + @assert NDIMS isa Integer && NDIMS > 0 + + # Create mesh + m = new() + m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) + m.current_filename = "" + m.unsaved_changes = true + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) + + return m + end end const TreeMesh1D = TreeMesh{1, TreeType} where {TreeType <: AbstractTree{1}} const TreeMesh2D = TreeMesh{2, TreeType} where {TreeType <: AbstractTree{2}} const TreeMesh3D = TreeMesh{3, TreeType} where {TreeType <: AbstractTree{3}} -const SerialTreeMesh{NDIMS} = TreeMesh{NDIMS, <:SerialTree{NDIMS}} +const SerialTreeMesh{NDIMS} = TreeMesh{NDIMS, <:SerialTree{NDIMS}} const ParallelTreeMesh{NDIMS} = TreeMesh{NDIMS, <:ParallelTree{NDIMS}} @inline mpi_parallel(mesh::SerialTreeMesh) = False() @@ -75,140 +80,152 @@ const ParallelTreeMesh{NDIMS} = TreeMesh{NDIMS, <:ParallelTree{NDIMS}} partition!(mesh::SerialTreeMesh) = nothing - # Constructor for passing the dimension and mesh type as an argument -TreeMesh(::Type{TreeType}, args...) where {NDIMS, TreeType<:AbstractTree{NDIMS}} = TreeMesh{NDIMS, TreeType}(args...) +function TreeMesh(::Type{TreeType}, + args...) where {NDIMS, TreeType <: AbstractTree{NDIMS}} + TreeMesh{NDIMS, TreeType}(args...) +end # Constructor accepting a single number as center (as opposed to an array) for 1D -function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real, periodicity=true) where {TreeType<:AbstractTree{1}} - # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 - return TreeMesh{1, TreeType}(n, SVector{1,Float64}(center), len, periodicity) +function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real, + periodicity = true) where {TreeType <: AbstractTree{1}} + # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 + return TreeMesh{1, TreeType}(n, SVector{1, Float64}(center), len, periodicity) end -function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::NTuple{NDIMS,Real}, domain_length::Real, periodicity=true) where {NDIMS, TreeType<:AbstractTree{NDIMS}} - # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 - TreeMesh{NDIMS, TreeType}(n_cells_max, SVector{NDIMS,Float64}(domain_center), convert(Float64, domain_length), periodicity) +function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, + domain_center::NTuple{NDIMS, Real}, + domain_length::Real, + periodicity = true) where {NDIMS, + TreeType <: + AbstractTree{NDIMS}} + # TODO: Taal refactor, allow other RealT for the mesh, not just Float64 + TreeMesh{NDIMS, TreeType}(n_cells_max, SVector{NDIMS, Float64}(domain_center), + convert(Float64, domain_length), periodicity) end -function TreeMesh(coordinates_min::NTuple{NDIMS,Real}, coordinates_max::NTuple{NDIMS,Real}; +function TreeMesh(coordinates_min::NTuple{NDIMS, Real}, + coordinates_max::NTuple{NDIMS, Real}; n_cells_max, - periodicity=true, + periodicity = true, initial_refinement_level, - refinement_patches=(), - coarsening_patches=(), - ) where {NDIMS} - # check arguments - if !(n_cells_max isa Integer && n_cells_max > 0) - throw(ArgumentError("`n_cells_max` must be a positive integer (provided `n_cells_max = $n_cells_max`)")) - end - if !(initial_refinement_level isa Integer && initial_refinement_level >= 0) - throw(ArgumentError("`initial_refinement_level` must be a non-negative integer (provided `initial_refinement_level = $initial_refinement_level`)")) - end - - # Domain length is calculated as the maximum length in any axis direction - domain_center = @. (coordinates_min + coordinates_max) / 2 - domain_length = maximum(coordinates_max .- coordinates_min) - - # TODO: MPI, create nice interface for a parallel tree/mesh - if mpi_isparallel() - TreeType = ParallelTree{NDIMS} - else - TreeType = SerialTree{NDIMS} - end - - # Create mesh - mesh = @trixi_timeit timer() "creation" TreeMesh{NDIMS, TreeType}(n_cells_max, domain_center, domain_length, periodicity) - - # Initialize mesh - initialize!(mesh, initial_refinement_level, refinement_patches, coarsening_patches) - - return mesh + refinement_patches = (), + coarsening_patches = ()) where {NDIMS} + # check arguments + if !(n_cells_max isa Integer && n_cells_max > 0) + throw(ArgumentError("`n_cells_max` must be a positive integer (provided `n_cells_max = $n_cells_max`)")) + end + if !(initial_refinement_level isa Integer && initial_refinement_level >= 0) + throw(ArgumentError("`initial_refinement_level` must be a non-negative integer (provided `initial_refinement_level = $initial_refinement_level`)")) + end + + # Domain length is calculated as the maximum length in any axis direction + domain_center = @. (coordinates_min + coordinates_max) / 2 + domain_length = maximum(coordinates_max .- coordinates_min) + + # TODO: MPI, create nice interface for a parallel tree/mesh + if mpi_isparallel() + if mpi_isroot() && NDIMS == 3 + println(stderr, + "ERROR: TreeMesh3D does not support parallel execution with MPI") + MPI.Abort(mpi_comm(), 1) + end + TreeType = ParallelTree{NDIMS} + else + TreeType = SerialTree{NDIMS} + end + + # Create mesh + mesh = @trixi_timeit timer() "creation" TreeMesh{NDIMS, TreeType}(n_cells_max, + domain_center, + domain_length, + periodicity) + + # Initialize mesh + initialize!(mesh, initial_refinement_level, refinement_patches, coarsening_patches) + + return mesh end function initialize!(mesh::TreeMesh, initial_refinement_level, refinement_patches, coarsening_patches) - # Create initial refinement - @trixi_timeit timer() "initial refinement" refine_uniformly!(mesh.tree, initial_refinement_level) - - # Apply refinement patches - @trixi_timeit timer() "refinement patches" for patch in refinement_patches - # TODO: Taal refactor, use multiple dispatch? - if patch.type == "box" - refine_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max) - elseif patch.type == "sphere" - refine_sphere!(mesh.tree, patch.center, patch.radius) - else - error("unknown refinement patch type '$(patch.type)'") + # Create initial refinement + @trixi_timeit timer() "initial refinement" refine_uniformly!(mesh.tree, + initial_refinement_level) + + # Apply refinement patches + @trixi_timeit timer() "refinement patches" for patch in refinement_patches + # TODO: Taal refactor, use multiple dispatch? + if patch.type == "box" + refine_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max) + elseif patch.type == "sphere" + refine_sphere!(mesh.tree, patch.center, patch.radius) + else + error("unknown refinement patch type '$(patch.type)'") + end end - end - # Apply coarsening patches - @trixi_timeit timer() "coarsening patches" for patch in coarsening_patches - # TODO: Taal refactor, use multiple dispatch - if patch.type == "box" - coarsen_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max) - else - error("unknown coarsening patch type '$(patch.type)'") + # Apply coarsening patches + @trixi_timeit timer() "coarsening patches" for patch in coarsening_patches + # TODO: Taal refactor, use multiple dispatch + if patch.type == "box" + coarsen_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max) + else + error("unknown coarsening patch type '$(patch.type)'") + end end - end - # Partition the mesh among multiple MPI ranks (does nothing if run in serial) - partition!(mesh) + # Partition the mesh among multiple MPI ranks (does nothing if run in serial) + partition!(mesh) - return nothing + return nothing end function TreeMesh(coordinates_min::Real, coordinates_max::Real; kwargs...) - TreeMesh((coordinates_min,), (coordinates_max,); kwargs...) + TreeMesh((coordinates_min,), (coordinates_max,); kwargs...) end - function Base.show(io::IO, mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType} - print(io, "TreeMesh{", NDIMS, ", ", TreeType, "} with length ", mesh.tree.length) + print(io, "TreeMesh{", NDIMS, ", ", TreeType, "} with length ", mesh.tree.length) end -function Base.show(io::IO, ::MIME"text/plain", mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType} - if get(io, :compact, false) - show(io, mesh) - else - setup = [ - "center" => mesh.tree.center_level_0, - "length" => mesh.tree.length_level_0, - "periodicity" => mesh.tree.periodicity, - "current #cells" => mesh.tree.length, - "maximum #cells" => mesh.tree.capacity, - ] - summary_box(io, "TreeMesh{" * string(NDIMS) * ", " * string(TreeType) * "}", setup) - end +function Base.show(io::IO, ::MIME"text/plain", + mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType} + if get(io, :compact, false) + show(io, mesh) + else + setup = [ + "center" => mesh.tree.center_level_0, + "length" => mesh.tree.length_level_0, + "periodicity" => mesh.tree.periodicity, + "current #cells" => mesh.tree.length, + "maximum #cells" => mesh.tree.capacity, + ] + summary_box(io, "TreeMesh{" * string(NDIMS) * ", " * string(TreeType) * "}", + setup) + end end - @inline Base.ndims(mesh::TreeMesh) = ndims(mesh.tree) - - # Obtain the mesh filename from a restart file function get_restart_mesh_filename(restart_filename, mpi_parallel::False) - # Get directory name - dirname, _ = splitdir(restart_filename) + # Get directory name + dirname, _ = splitdir(restart_filename) - # Read mesh filename from restart file - mesh_file = "" - h5open(restart_filename, "r") do file - mesh_file = read(attributes(file)["mesh_file"]) - end + # Read mesh filename from restart file + mesh_file = "" + h5open(restart_filename, "r") do file + mesh_file = read(attributes(file)["mesh_file"]) + end - # Construct and return filename - return joinpath(dirname, mesh_file) + # Construct and return filename + return joinpath(dirname, mesh_file) end - function total_volume(mesh::TreeMesh) - return mesh.tree.length_level_0^ndims(mesh) + return mesh.tree.length_level_0^ndims(mesh) end - include("parallel_tree_mesh.jl") - - end # @muladd diff --git a/src/meshes/unstructured_mesh.jl b/src/meshes/unstructured_mesh.jl index 202abe8079b..c370c0f25f8 100644 --- a/src/meshes/unstructured_mesh.jl +++ b/src/meshes/unstructured_mesh.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ UnstructuredMesh2D <: AbstractMesh{2} @@ -15,211 +15,227 @@ An unstructured (possibly curved) quadrilateral mesh. All mesh information, neighbour coupling, and boundary curve information is read in from a mesh file `filename`. """ -mutable struct UnstructuredMesh2D{RealT<:Real, CurvedSurfaceT<:CurvedSurface{RealT}} <: AbstractMesh{2} - filename ::String - n_corners ::Int - n_surfaces ::Int # total number of surfaces - n_interfaces ::Int # number of interior surfaces - n_boundaries ::Int # number of surfaces on the physical boundary - n_elements ::Int - polydeg ::Int - corners ::Array{RealT, 2} # [ndims, n_corners] - neighbour_information::Array{Int, 2} # [neighbour node/element/edge ids, n_surfaces] - boundary_names ::Array{Symbol, 2} # [local sides, n_elements] - periodicity ::Bool - element_node_ids ::Array{Int, 2} # [node ids, n_elements] - element_is_curved ::Vector{Bool} - surface_curves ::Array{CurvedSurfaceT, 2} # [local sides, n_elements] - current_filename ::String - unsaved_changes ::Bool # if true, the mesh will be saved for plotting +mutable struct UnstructuredMesh2D{RealT <: Real, CurvedSurfaceT <: CurvedSurface{RealT} + } <: AbstractMesh{2} + filename :: String + n_corners :: Int + n_surfaces :: Int # total number of surfaces + n_interfaces :: Int # number of interior surfaces + n_boundaries :: Int # number of surfaces on the physical boundary + n_elements :: Int + polydeg :: Int + corners :: Array{RealT, 2} # [ndims, n_corners] + neighbour_information :: Array{Int, 2} # [neighbour node/element/edge ids, n_surfaces] + boundary_names :: Array{Symbol, 2} # [local sides, n_elements] + periodicity :: Bool + element_node_ids :: Array{Int, 2} # [node ids, n_elements] + element_is_curved :: Vector{Bool} + surface_curves :: Array{CurvedSurfaceT, 2} # [local sides, n_elements] + current_filename :: String + unsaved_changes :: Bool # if true, the mesh will be saved for plotting end - # constructor for an unstructured mesh read in from a file # TODO: this mesh file parsing and construction of the mesh skeleton can likely be improved in terms # of performance -function UnstructuredMesh2D(filename; RealT=Float64, periodicity=false, unsaved_changes=true) - - # readin all the information from the mesh file into a string array - file_lines = readlines(open(filename)) - - # readin the number of nodes, number of interfaces, number of elements and local polynomial degree - current_line = split(file_lines[2]) - n_corners = parse(Int, current_line[1]) - n_surfaces = parse(Int, current_line[2]) - n_elements = parse(Int, current_line[3]) - mesh_polydeg = parse(Int, current_line[4]) - - mesh_nnodes = mesh_polydeg + 1 - - # The types of structs used in the following depend on information read from - # the mesh file. Thus, this cannot be type stable at all. Hence, we allocate - # the memory now and introduce a function barrier before continuing to read - # data from the file. - corner_nodes = Array{RealT}(undef, (2, n_corners)) - interface_info = Array{Int}(undef, (6, n_surfaces)) - element_node_ids = Array{Int}(undef, (4, n_elements)) - curved_check = Vector{Int}(undef, 4) - quad_corners = Array{RealT}(undef, (4, 2)) - quad_corners_flipped = Array{RealT}(undef, (4, 2)) - curve_values = Array{RealT}(undef, (mesh_nnodes, 2)) - element_is_curved = Array{Bool}(undef, n_elements) - CurvedSurfaceT = CurvedSurface{RealT} - surface_curves = Array{CurvedSurfaceT}(undef, (4, n_elements)) - boundary_names = Array{Symbol}(undef, (4, n_elements)) - - # create the Chebyshev-Gauss-Lobatto nodes used to represent any curved boundaries that are - # required to construct the sides - cheby_nodes_, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes) - bary_weights_ = barycentric_weights(cheby_nodes_) - cheby_nodes = SVector{mesh_nnodes}(cheby_nodes_) - bary_weights = SVector{mesh_nnodes}(bary_weights_) - - arrays = (; corner_nodes, interface_info, element_node_ids, curved_check, +function UnstructuredMesh2D(filename; RealT = Float64, periodicity = false, + unsaved_changes = true) + + # readin all the information from the mesh file into a string array + file_lines = readlines(open(filename)) + + # readin the number of nodes, number of interfaces, number of elements and local polynomial degree + current_line = split(file_lines[2]) + n_corners = parse(Int, current_line[1]) + n_surfaces = parse(Int, current_line[2]) + n_elements = parse(Int, current_line[3]) + mesh_polydeg = parse(Int, current_line[4]) + + mesh_nnodes = mesh_polydeg + 1 + + # The types of structs used in the following depend on information read from + # the mesh file. Thus, this cannot be type stable at all. Hence, we allocate + # the memory now and introduce a function barrier before continuing to read + # data from the file. + corner_nodes = Array{RealT}(undef, (2, n_corners)) + interface_info = Array{Int}(undef, (6, n_surfaces)) + element_node_ids = Array{Int}(undef, (4, n_elements)) + curved_check = Vector{Int}(undef, 4) + quad_corners = Array{RealT}(undef, (4, 2)) + quad_corners_flipped = Array{RealT}(undef, (4, 2)) + curve_values = Array{RealT}(undef, (mesh_nnodes, 2)) + element_is_curved = Array{Bool}(undef, n_elements) + CurvedSurfaceT = CurvedSurface{RealT} + surface_curves = Array{CurvedSurfaceT}(undef, (4, n_elements)) + boundary_names = Array{Symbol}(undef, (4, n_elements)) + + # create the Chebyshev-Gauss-Lobatto nodes used to represent any curved boundaries that are + # required to construct the sides + cheby_nodes_, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes) + bary_weights_ = barycentric_weights(cheby_nodes_) + cheby_nodes = SVector{mesh_nnodes}(cheby_nodes_) + bary_weights = SVector{mesh_nnodes}(bary_weights_) + + arrays = (; corner_nodes, interface_info, element_node_ids, curved_check, quad_corners, quad_corners_flipped, curve_values, element_is_curved, surface_curves, boundary_names) - counters = (; n_corners, n_surfaces, n_elements) + counters = (; n_corners, n_surfaces, n_elements) - n_boundaries = parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, cheby_nodes, bary_weights) + n_boundaries = parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, + cheby_nodes, bary_weights) - # get the number of internal interfaces in the mesh - if periodicity - n_interfaces = n_surfaces - n_boundaries = 0 - else - n_interfaces = n_surfaces - n_boundaries - end - - return UnstructuredMesh2D{RealT, CurvedSurfaceT}( - filename, n_corners, n_surfaces, n_interfaces, n_boundaries, - n_elements, mesh_polydeg, corner_nodes, - interface_info, boundary_names, periodicity, - element_node_ids, element_is_curved, surface_curves, "", unsaved_changes) + # get the number of internal interfaces in the mesh + if periodicity + n_interfaces = n_surfaces + n_boundaries = 0 + else + n_interfaces = n_surfaces - n_boundaries + end + + return UnstructuredMesh2D{RealT, CurvedSurfaceT}(filename, n_corners, n_surfaces, + n_interfaces, n_boundaries, + n_elements, mesh_polydeg, + corner_nodes, + interface_info, boundary_names, + periodicity, + element_node_ids, + element_is_curved, surface_curves, + "", unsaved_changes) end -function parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, cheby_nodes, bary_weights) - @unpack ( corner_nodes, interface_info, element_node_ids, curved_check, - quad_corners, quad_corners_flipped, curve_values, - element_is_curved, surface_curves, boundary_names ) = arrays - @unpack n_corners, n_surfaces, n_elements = counters - mesh_nnodes = length(cheby_nodes) - - # counter to step through the mesh file line by line - file_idx = 3 - - # readin an store the nodes that dictate the corners of the elements needed to construct the - # element geometry terms - for j in 1:n_corners - current_line = split(file_lines[file_idx]) - corner_nodes[1, j] = parse(RealT, current_line[1]) - corner_nodes[2, j] = parse(RealT, current_line[2]) - file_idx += 1 - end - - # readin an store the nodes that dictate the interfaces, neighbour data, and orientations contains - # the following: - # interface_info[1] = start node ID - # interface_info[2] = end node ID - # interface_info[3] = ID of the primary element - # interface_info[4] = ID of the secondary element (if 0 then it is a physical boundary) - # interface_info[5] = local side ID on the primary element - # interface_info[6] = local side ID on the secondary element - # container to for the interface neighbour information and connectivity - n_boundaries = 0 - for j in 1:n_surfaces - current_line = split(file_lines[file_idx]) - interface_info[1, j] = parse(Int, current_line[1]) - interface_info[2, j] = parse(Int, current_line[2]) - interface_info[3, j] = parse(Int, current_line[3]) - interface_info[4, j] = parse(Int, current_line[4]) - interface_info[5, j] = parse(Int, current_line[5]) - interface_info[6, j] = parse(Int, current_line[6]) - - # count the number of physical boundaries - if interface_info[4,j] == 0 - n_boundaries += 1 +function parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, + cheby_nodes, bary_weights) + @unpack (corner_nodes, interface_info, element_node_ids, curved_check, + quad_corners, quad_corners_flipped, curve_values, + element_is_curved, surface_curves, boundary_names) = arrays + @unpack n_corners, n_surfaces, n_elements = counters + mesh_nnodes = length(cheby_nodes) + + # counter to step through the mesh file line by line + file_idx = 3 + + # readin an store the nodes that dictate the corners of the elements needed to construct the + # element geometry terms + for j in 1:n_corners + current_line = split(file_lines[file_idx]) + corner_nodes[1, j] = parse(RealT, current_line[1]) + corner_nodes[2, j] = parse(RealT, current_line[2]) + file_idx += 1 end - file_idx += 1 - end - - # work arrays to pull to correct corners of a given element (agnostic to curvature) and local - # copies of the curved boundary information - - # readin an store the curved boundary information of the elements - - for j in 1:n_elements - # pull the corner node IDs - current_line = split(file_lines[file_idx]) - element_node_ids[1, j] = parse(Int, current_line[1]) - element_node_ids[2, j] = parse(Int, current_line[2]) - element_node_ids[3, j] = parse(Int, current_line[3]) - element_node_ids[4, j] = parse(Int, current_line[4]) - for i in 1:4 - # pull the (x,y) values of these corners out of the nodes array - quad_corners[i, :] .= corner_nodes[:, element_node_ids[i, j]] + + # readin an store the nodes that dictate the interfaces, neighbour data, and orientations contains + # the following: + # interface_info[1] = start node ID + # interface_info[2] = end node ID + # interface_info[3] = ID of the primary element + # interface_info[4] = ID of the secondary element (if 0 then it is a physical boundary) + # interface_info[5] = local side ID on the primary element + # interface_info[6] = local side ID on the secondary element + # container to for the interface neighbour information and connectivity + n_boundaries = 0 + for j in 1:n_surfaces + current_line = split(file_lines[file_idx]) + interface_info[1, j] = parse(Int, current_line[1]) + interface_info[2, j] = parse(Int, current_line[2]) + interface_info[3, j] = parse(Int, current_line[3]) + interface_info[4, j] = parse(Int, current_line[4]) + interface_info[5, j] = parse(Int, current_line[5]) + interface_info[6, j] = parse(Int, current_line[6]) + + # count the number of physical boundaries + if interface_info[4, j] == 0 + n_boundaries += 1 + end + file_idx += 1 end - # pull the information to check if boundary is curved in order to read in additional data - file_idx += 1 - current_line = split(file_lines[file_idx]) - curved_check[1] = parse(Int, current_line[1]) - curved_check[2] = parse(Int, current_line[2]) - curved_check[3] = parse(Int, current_line[3]) - curved_check[4] = parse(Int, current_line[4]) - if sum(curved_check) == 0 - # quadrilateral element is straight sided - element_is_curved[j] = false - file_idx += 1 - # read all the boundary names - boundary_names[:, j] = map(Symbol, split(file_lines[file_idx])) - else - # quadrilateral element has at least one curved side - element_is_curved[j] = true - - # flip node ordering to make sure the element is right-handed for the interpolations - m1 = 1 - m2 = 2 - @views quad_corners_flipped[1, :] .= quad_corners[4, :] - @views quad_corners_flipped[2, :] .= quad_corners[2, :] - @views quad_corners_flipped[3, :] .= quad_corners[3, :] - @views quad_corners_flipped[4, :] .= quad_corners[1, :] - for i in 1:4 - if curved_check[i] == 0 - # when curved_check[i] is 0 then the "curve" from corner `i` to corner `i+1` is a - # straight line. So we must construct the interpolant for this line - for k in 1:mesh_nnodes - curve_values[k, 1] = linear_interpolate(cheby_nodes[k], quad_corners_flipped[m1, 1], quad_corners_flipped[m2, 1]) - curve_values[k, 2] = linear_interpolate(cheby_nodes[k], quad_corners_flipped[m1, 2], quad_corners_flipped[m2, 2]) - end - else - # when curved_check[i] is 1 this curved boundary information is supplied by the mesh - # generator. So we just read it into a work array - for k in 1:mesh_nnodes - file_idx += 1 - current_line = split(file_lines[file_idx]) - curve_values[k, 1] = parse(RealT,current_line[1]) - curve_values[k, 2] = parse(RealT,current_line[2]) - end + + # work arrays to pull to correct corners of a given element (agnostic to curvature) and local + # copies of the curved boundary information + + # readin an store the curved boundary information of the elements + + for j in 1:n_elements + # pull the corner node IDs + current_line = split(file_lines[file_idx]) + element_node_ids[1, j] = parse(Int, current_line[1]) + element_node_ids[2, j] = parse(Int, current_line[2]) + element_node_ids[3, j] = parse(Int, current_line[3]) + element_node_ids[4, j] = parse(Int, current_line[4]) + for i in 1:4 + # pull the (x,y) values of these corners out of the nodes array + quad_corners[i, :] .= corner_nodes[:, element_node_ids[i, j]] end - # construct the curve interpolant for the current side - surface_curves[i, j] = CurvedSurfaceT(cheby_nodes, bary_weights, copy(curve_values)) - # indexing update that contains a "flip" to ensure correct element orientation - # if we need to construct the straight line "curves" when curved_check[i] == 0 - m1 += 1 - if i == 3 - m2 = 1 + # pull the information to check if boundary is curved in order to read in additional data + file_idx += 1 + current_line = split(file_lines[file_idx]) + curved_check[1] = parse(Int, current_line[1]) + curved_check[2] = parse(Int, current_line[2]) + curved_check[3] = parse(Int, current_line[3]) + curved_check[4] = parse(Int, current_line[4]) + if sum(curved_check) == 0 + # quadrilateral element is straight sided + element_is_curved[j] = false + file_idx += 1 + # read all the boundary names + boundary_names[:, j] = map(Symbol, split(file_lines[file_idx])) else - m2 += 1 + # quadrilateral element has at least one curved side + element_is_curved[j] = true + + # flip node ordering to make sure the element is right-handed for the interpolations + m1 = 1 + m2 = 2 + @views quad_corners_flipped[1, :] .= quad_corners[4, :] + @views quad_corners_flipped[2, :] .= quad_corners[2, :] + @views quad_corners_flipped[3, :] .= quad_corners[3, :] + @views quad_corners_flipped[4, :] .= quad_corners[1, :] + for i in 1:4 + if curved_check[i] == 0 + # when curved_check[i] is 0 then the "curve" from corner `i` to corner `i+1` is a + # straight line. So we must construct the interpolant for this line + for k in 1:mesh_nnodes + curve_values[k, 1] = linear_interpolate(cheby_nodes[k], + quad_corners_flipped[m1, + 1], + quad_corners_flipped[m2, + 1]) + curve_values[k, 2] = linear_interpolate(cheby_nodes[k], + quad_corners_flipped[m1, + 2], + quad_corners_flipped[m2, + 2]) + end + else + # when curved_check[i] is 1 this curved boundary information is supplied by the mesh + # generator. So we just read it into a work array + for k in 1:mesh_nnodes + file_idx += 1 + current_line = split(file_lines[file_idx]) + curve_values[k, 1] = parse(RealT, current_line[1]) + curve_values[k, 2] = parse(RealT, current_line[2]) + end + end + # construct the curve interpolant for the current side + surface_curves[i, j] = CurvedSurfaceT(cheby_nodes, bary_weights, + copy(curve_values)) + # indexing update that contains a "flip" to ensure correct element orientation + # if we need to construct the straight line "curves" when curved_check[i] == 0 + m1 += 1 + if i == 3 + m2 = 1 + else + m2 += 1 + end + end + # finally read in the boundary names where "---" means an internal connection + file_idx += 1 + boundary_names[:, j] = map(Symbol, split(file_lines[file_idx])) end - end - # finally read in the boundary names where "---" means an internal connection - file_idx += 1 - boundary_names[:, j] = map(Symbol, split(file_lines[file_idx])) + # one last increment to the global index to read the next piece of element information + file_idx += 1 end - # one last increment to the global index to read the next piece of element information - file_idx += 1 - end - return n_boundaries + return n_boundaries end @inline Base.ndims(::UnstructuredMesh2D) = 2 @@ -230,24 +246,27 @@ isperiodic(mesh::UnstructuredMesh2D) = mesh.periodicity Base.length(mesh::UnstructuredMesh2D) = mesh.n_elements - -function Base.show(io::IO, ::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, CurvedSurfaceT} - print(io, "UnstructuredMesh2D{2, ", RealT, ", ", CurvedSurfaceT, "}") +function Base.show(io::IO, + ::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, + CurvedSurfaceT} + print(io, "UnstructuredMesh2D{2, ", RealT, ", ", CurvedSurfaceT, "}") end - -function Base.show(io::IO, ::MIME"text/plain", mesh::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, CurvedSurfaceT} - if get(io, :compact, false) - show(io, mesh) - else - summary_header(io, "UnstructuredMesh2D{" * string(2) * ", " * string(RealT) * ", " * string(CurvedSurfaceT) * "}") - summary_line(io, "mesh file", mesh.filename) - summary_line(io, "number of elements", length(mesh)) - summary_line(io, "faces", mesh.n_surfaces) - summary_line(io, "mesh polynomial degree", mesh.polydeg) - summary_footer(io) - end +function Base.show(io::IO, ::MIME"text/plain", + mesh::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, + CurvedSurfaceT + } + if get(io, :compact, false) + show(io, mesh) + else + summary_header(io, + "UnstructuredMesh2D{" * string(2) * ", " * string(RealT) * ", " * + string(CurvedSurfaceT) * "}") + summary_line(io, "mesh file", mesh.filename) + summary_line(io, "number of elements", length(mesh)) + summary_line(io, "faces", mesh.n_surfaces) + summary_line(io, "mesh polynomial degree", mesh.polydeg) + summary_footer(io) + end end - - end # @muladd diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index ec4c33c5628..8fef66d261e 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ ndofs(semi::AbstractSemidiscretization) @@ -11,11 +11,10 @@ Return the number of degrees of freedom associated with each scalar variable. """ @inline function ndofs(semi::AbstractSemidiscretization) - mesh, _, solver, cache = mesh_equations_solver_cache(semi) - ndofs(mesh, solver, cache) + mesh, _, solver, cache = mesh_equations_solver_cache(semi) + ndofs(mesh, solver, cache) end - """ integrate_via_indices(func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true) @@ -24,11 +23,13 @@ and integrate the result using a quadrature associated with the semidiscretizati If `normalize` is true, the result is divided by the total volume of the computational domain. """ -function integrate_via_indices(func::Func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true) where {Func} - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) +function integrate_via_indices(func::Func, u_ode, semi::AbstractSemidiscretization, + args...; normalize = true) where {Func} + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - integrate_via_indices(func, u, mesh, equations, solver, cache, args..., normalize=normalize) + u = wrap_array(u_ode, mesh, equations, solver, cache) + integrate_via_indices(func, u, mesh, equations, solver, cache, args..., + normalize = normalize) end """ @@ -39,18 +40,18 @@ and integrate the result using a quadrature associated with the semidiscretizati If `normalize` is true, the result is divided by the total volume of the computational domain. """ -function integrate(func::Func, u_ode, semi::AbstractSemidiscretization; normalize=true) where {Func} - mesh, equations, solver, cache = mesh_equations_solver_cache(semi) +function integrate(func::Func, u_ode, semi::AbstractSemidiscretization; + normalize = true) where {Func} + mesh, equations, solver, cache = mesh_equations_solver_cache(semi) - u = wrap_array(u_ode, mesh, equations, solver, cache) - integrate(func, u, mesh, equations, solver, cache, normalize=normalize) + u = wrap_array(u_ode, mesh, equations, solver, cache) + integrate(func, u, mesh, equations, solver, cache, normalize = normalize) end -function integrate(u, semi::AbstractSemidiscretization; normalize=true) - integrate(cons2cons, u, semi; normalize=normalize) +function integrate(u, semi::AbstractSemidiscretization; normalize = true) + integrate(cons2cons, u, semi; normalize = normalize) end - """ calc_error_norms([func=(u_node,equations)->u_node,] u_ode, t, analyzer, semi::AbstractSemidiscretization, cache_analysis) @@ -58,8 +59,10 @@ Calculate discrete L2 and L∞ error norms of `func` applied to each nodal varia If no exact solution is available, "errors" are calculated using some reference state and can be useful for regression tests. """ -calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization, cache_analysis) = calc_error_norms(cons2cons, u_ode, t, analyzer, semi, cache_analysis) - +function calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization, + cache_analysis) + calc_error_norms(cons2cons, u_ode, t, analyzer, semi, cache_analysis) +end """ semidiscretize(semi::AbstractSemidiscretization, tspan) @@ -68,15 +71,14 @@ Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). """ function semidiscretize(semi::AbstractSemidiscretization, tspan) - u0_ode = compute_coefficients(first(tspan), semi) - # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using - # mpi_isparallel() && MPI.Barrier(mpi_comm()) - # See https://github.com/trixi-framework/Trixi.jl/issues/328 - iip = true # is-inplace, i.e., we modify a vector when calling rhs! - return ODEProblem{iip}(rhs!, u0_ode, tspan, semi) + u0_ode = compute_coefficients(first(tspan), semi) + # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using + # mpi_isparallel() && MPI.Barrier(mpi_comm()) + # See https://github.com/trixi-framework/Trixi.jl/issues/328 + iip = true # is-inplace, i.e., we modify a vector when calling rhs! + return ODEProblem{iip}(rhs!, u0_ode, tspan, semi) end - """ semidiscretize(semi::AbstractSemidiscretization, tspan, restart_file::AbstractString) @@ -84,16 +86,16 @@ Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). The initial condition etc. is taken from the `restart_file`. """ -function semidiscretize(semi::AbstractSemidiscretization, tspan, restart_file::AbstractString) - u0_ode = load_restart_file(semi, restart_file) - # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using - # mpi_isparallel() && MPI.Barrier(mpi_comm()) - # See https://github.com/trixi-framework/Trixi.jl/issues/328 - iip = true # is-inplace, i.e., we modify a vector when calling rhs! - return ODEProblem{iip}(rhs!, u0_ode, tspan, semi) +function semidiscretize(semi::AbstractSemidiscretization, tspan, + restart_file::AbstractString) + u0_ode = load_restart_file(semi, restart_file) + # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using + # mpi_isparallel() && MPI.Barrier(mpi_comm()) + # See https://github.com/trixi-framework/Trixi.jl/issues/328 + iip = true # is-inplace, i.e., we modify a vector when calling rhs! + return ODEProblem{iip}(rhs!, u0_ode, tspan, semi) end - """ compute_coefficients(func, t, semi::AbstractSemidiscretization) @@ -109,10 +111,10 @@ For semidiscretizations `semi` associated with an initial condition, `func` can to use the given initial condition at time `t`. """ function compute_coefficients(func, t, semi::AbstractSemidiscretization) - u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...) - # Call `compute_coefficients` defined below - compute_coefficients!(u_ode, func, t, semi) - return u_ode + u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...) + # Call `compute_coefficients` defined below + compute_coefficients!(u_ode, func, t, semi) + return u_ode end """ @@ -121,12 +123,11 @@ end Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`. """ function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization) - u = wrap_array(u_ode, semi) - # Call `compute_coefficients` defined by the solver - compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...) + u = wrap_array(u_ode, semi) + # Call `compute_coefficients` defined by the solver + compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...) end - """ linear_structure(semi::AbstractSemidiscretization; t0=zero(real(semi))) @@ -136,31 +137,30 @@ at time `t0` as an affine-linear operator given by a linear operator `A` and a vector `b`. """ function linear_structure(semi::AbstractSemidiscretization; - t0=zero(real(semi))) - # allocate memory - u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...) - du_ode = similar(u_ode) - - # get the right hand side from possible source terms - u_ode .= zero(eltype(u_ode)) - rhs!(du_ode, u_ode, semi, t0) - # Create a copy of `b` used internally to extract the linear part of `semi`. - # This is necessary to get everything correct when the users updates the - # returned vector `b`. - b = -du_ode - b_tmp = copy(b) - - # wrap the linear operator - A = LinearMap(length(u_ode), ismutating=true) do dest,src - rhs!(dest, src, semi, t0) - @. dest += b_tmp - dest - end - - return A, b + t0 = zero(real(semi))) + # allocate memory + u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...) + du_ode = similar(u_ode) + + # get the right hand side from possible source terms + u_ode .= zero(eltype(u_ode)) + rhs!(du_ode, u_ode, semi, t0) + # Create a copy of `b` used internally to extract the linear part of `semi`. + # This is necessary to get everything correct when the users updates the + # returned vector `b`. + b = -du_ode + b_tmp = copy(b) + + # wrap the linear operator + A = LinearMap(length(u_ode), ismutating = true) do dest, src + rhs!(dest, src, semi, t0) + @. dest += b_tmp + dest + end + + return A, b end - """ jacobian_fd(semi::AbstractSemidiscretization; t0=zero(real(semi)), @@ -171,44 +171,43 @@ and simple second order finite difference to compute the Jacobian `J` of the semidiscretization `semi` at state `u0_ode`. """ function jacobian_fd(semi::AbstractSemidiscretization; - t0=zero(real(semi)), - u0_ode=compute_coefficients(t0, semi)) - # copy the initial state since it will be modified in the following - u_ode = copy(u0_ode) - du0_ode = similar(u_ode) - dup_ode = similar(u_ode) - dum_ode = similar(u_ode) + t0 = zero(real(semi)), + u0_ode = compute_coefficients(t0, semi)) + # copy the initial state since it will be modified in the following + u_ode = copy(u0_ode) + du0_ode = similar(u_ode) + dup_ode = similar(u_ode) + dum_ode = similar(u_ode) - # compute residual of linearization state - rhs!(du0_ode, u_ode, semi, t0) + # compute residual of linearization state + rhs!(du0_ode, u_ode, semi, t0) - # initialize Jacobian matrix - J = zeros(eltype(u_ode), length(u_ode), length(u_ode)) + # initialize Jacobian matrix + J = zeros(eltype(u_ode), length(u_ode), length(u_ode)) - # use second order finite difference to estimate Jacobian matrix - for idx in eachindex(u0_ode) - # determine size of fluctuation - epsilon = sqrt(eps(u0_ode[idx])) + # use second order finite difference to estimate Jacobian matrix + for idx in eachindex(u0_ode) + # determine size of fluctuation + epsilon = sqrt(eps(u0_ode[idx])) - # plus fluctuation - u_ode[idx] = u0_ode[idx] + epsilon - rhs!(dup_ode, u_ode, semi, t0) + # plus fluctuation + u_ode[idx] = u0_ode[idx] + epsilon + rhs!(dup_ode, u_ode, semi, t0) - # minus fluctuation - u_ode[idx] = u0_ode[idx] - epsilon - rhs!(dum_ode, u_ode, semi, t0) + # minus fluctuation + u_ode[idx] = u0_ode[idx] - epsilon + rhs!(dum_ode, u_ode, semi, t0) - # restore linearisation state - u_ode[idx] = u0_ode[idx] + # restore linearisation state + u_ode[idx] = u0_ode[idx] - # central second order finite difference - @. J[:, idx] = (dup_ode - dum_ode) / (2 * epsilon) - end + # central second order finite difference + @. J[:, idx] = (dup_ode - dum_ode) / (2 * epsilon) + end - return J + return J end - """ jacobian_ad_forward(semi::AbstractSemidiscretization; t0=zero(real(semi)), @@ -219,98 +218,105 @@ and forward mode automatic differentiation to compute the Jacobian `J` of the semidiscretization `semi` at state `u0_ode`. """ function jacobian_ad_forward(semi::AbstractSemidiscretization; - t0=zero(real(semi)), - u0_ode=compute_coefficients(t0, semi)) - jacobian_ad_forward(semi, t0, u0_ode) + t0 = zero(real(semi)), + u0_ode = compute_coefficients(t0, semi)) + jacobian_ad_forward(semi, t0, u0_ode) end # The following version is for plain arrays function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, u0_ode) - du_ode = similar(u0_ode) - config = ForwardDiff.JacobianConfig(nothing, du_ode, u0_ode) + du_ode = similar(u0_ode) + config = ForwardDiff.JacobianConfig(nothing, du_ode, u0_ode) - # Use a function barrier since the generation of the `config` we use above - # is not type-stable - _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config) + # Use a function barrier since the generation of the `config` we use above + # is not type-stable + _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config) end function _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config) + new_semi = remake(semi, uEltype = eltype(config)) + J = ForwardDiff.jacobian(du_ode, u0_ode, config) do du_ode, u_ode + Trixi.rhs!(du_ode, u_ode, new_semi, t0) + end - new_semi = remake(semi, uEltype=eltype(config)) - J = ForwardDiff.jacobian(du_ode, u0_ode, config) do du_ode, u_ode - Trixi.rhs!(du_ode, u_ode, new_semi, t0) - end - - return J + return J end # This version is specialized to `StructArray`s used by some `DGMulti` solvers. # We need to convert the numerical solution vectors since ForwardDiff cannot # handle arrays of `SVector`s. function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, _u0_ode::StructArray) - u0_ode_plain = similar(_u0_ode, eltype(eltype(_u0_ode)), (size(_u0_ode)..., nvariables(semi))) - for (v, u_v) in enumerate(StructArrays.components(_u0_ode)) - u0_ode_plain[.., v] = u_v - end - du_ode_plain = similar(u0_ode_plain) - config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain) - - # Use a function barrier since the generation of the `config` we use above - # is not type-stable - _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config) + u0_ode_plain = similar(_u0_ode, eltype(eltype(_u0_ode)), + (size(_u0_ode)..., nvariables(semi))) + for (v, u_v) in enumerate(StructArrays.components(_u0_ode)) + u0_ode_plain[.., v] = u_v + end + du_ode_plain = similar(u0_ode_plain) + config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain) + + # Use a function barrier since the generation of the `config` we use above + # is not type-stable + _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config) end function _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config) - - new_semi = remake(semi, uEltype=eltype(config)) - J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, config) do du_ode_plain, u_ode_plain - u_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(u_ode_plain, :, :, v), nvariables(semi))) - du_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(du_ode_plain, :, :, v), nvariables(semi))) - Trixi.rhs!(du_ode, u_ode, new_semi, t0) - end - - return J + new_semi = remake(semi, uEltype = eltype(config)) + J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, + config) do du_ode_plain, u_ode_plain + u_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(u_ode_plain, + :, + :, + v), + nvariables(semi))) + du_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(du_ode_plain, + :, + :, + v), + nvariables(semi))) + Trixi.rhs!(du_ode, u_ode, new_semi, t0) + end + + return J end # This version is specialized to arrays of `StaticArray`s used by some `DGMulti` solvers. # We need to convert the numerical solution vectors since ForwardDiff cannot # handle arrays of `SVector`s. -function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, _u0_ode::AbstractArray{<:SVector}) - u0_ode_plain = reinterpret(eltype(eltype(_u0_ode)), _u0_ode) - du_ode_plain = similar(u0_ode_plain) - config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain) - - # Use a function barrier since the generation of the `config` we use above - # is not type-stable - _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config) +function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, + _u0_ode::AbstractArray{<:SVector}) + u0_ode_plain = reinterpret(eltype(eltype(_u0_ode)), _u0_ode) + du_ode_plain = similar(u0_ode_plain) + config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain) + + # Use a function barrier since the generation of the `config` we use above + # is not type-stable + _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config) end function _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config) - - new_semi = remake(semi, uEltype=eltype(config)) - J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, config) do du_ode_plain, u_ode_plain - u_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, u_ode_plain) - du_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, du_ode_plain) - Trixi.rhs!(du_ode, u_ode, new_semi, t0) - end - - return J + new_semi = remake(semi, uEltype = eltype(config)) + J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, + config) do du_ode_plain, u_ode_plain + u_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, u_ode_plain) + du_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, du_ode_plain) + Trixi.rhs!(du_ode, u_ode, new_semi, t0) + end + + return J end - - # Sometimes, it can be useful to save some (scalar) variables associated with each element, # e.g. AMR indicators or shock indicators. Since these usually have to be re-computed # directly before IO and do not necessarily need to be stored in memory before, # get_element_variables!(element_variables, ..) # is used to retrieve such up to date element variables, modifying # `element_variables::Dict{Symbol,Any}` in place. -function get_element_variables!(element_variables, u_ode, semi::AbstractSemidiscretization) - u = wrap_array(u_ode, semi) - get_element_variables!(element_variables, u, mesh_equations_solver_cache(semi)...) +function get_element_variables!(element_variables, u_ode, + semi::AbstractSemidiscretization) + u = wrap_array(u_ode, semi) + get_element_variables!(element_variables, u, mesh_equations_solver_cache(semi)...) end - # To implement AMR and use OrdinaryDiffEq.jl etc., we have to be a bit creative. # Since the caches of the SciML ecosystem are immutable structs, we cannot simply # change the underlying arrays therein. Hence, to support changing the number of @@ -351,17 +357,15 @@ end # # Xref https://github.com/SciML/OrdinaryDiffEq.jl/pull/1275 function wrap_array(u_ode, semi::AbstractSemidiscretization) - wrap_array(u_ode, mesh_equations_solver_cache(semi)...) + wrap_array(u_ode, mesh_equations_solver_cache(semi)...) end # Like `wrap_array`, but guarantees to return a plain `Array`, which can be better # for writing solution files etc. function wrap_array_native(u_ode, semi::AbstractSemidiscretization) - wrap_array_native(u_ode, mesh_equations_solver_cache(semi)...) + wrap_array_native(u_ode, mesh_equations_solver_cache(semi)...) end - - # TODO: Taal, document interface? # New mesh/solver combinations have to implement # - ndofs(mesh, solver, cache) @@ -378,5 +382,4 @@ end # - rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) # - end # @muladd diff --git a/src/semidiscretization/semidiscretization_euler_acoustics.jl b/src/semidiscretization/semidiscretization_euler_acoustics.jl index c98847f7bfc..7608998c557 100644 --- a/src/semidiscretization/semidiscretization_euler_acoustics.jl +++ b/src/semidiscretization/semidiscretization_euler_acoustics.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics, semi_euler::SemiEuler; @@ -21,176 +21,193 @@ is described by a function `source_region` that maps the coordinates of a single Note that this semidiscretization should be used in conjunction with [`EulerAcousticsCouplingCallback`](@ref) and only works in two dimensions. """ -struct SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache} <: AbstractSemidiscretization - semi_acoustics::SemiAcoustics - semi_euler::SemiEuler - performance_counter::PerformanceCounter - cache::Cache - - function SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache}( - semi_acoustics, semi_euler, cache) where {SemiAcoustics, SemiEuler, Cache} - - # Currently both semidiscretizations need to use a shared mesh - @assert semi_acoustics.mesh == semi_euler.mesh - - # Check if both solvers use the same polynomial basis - @assert semi_acoustics.solver.basis == semi_euler.solver.basis - - performance_counter = PerformanceCounter() - new(semi_acoustics, semi_euler, performance_counter, cache) - end +struct SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache} <: + AbstractSemidiscretization + semi_acoustics::SemiAcoustics + semi_euler::SemiEuler + performance_counter::PerformanceCounter + cache::Cache + + function SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache}(semi_acoustics, + semi_euler, + cache) where { + SemiAcoustics, + SemiEuler, + Cache + } + + # Currently both semidiscretizations need to use a shared mesh + @assert semi_acoustics.mesh == semi_euler.mesh + + # Check if both solvers use the same polynomial basis + @assert semi_acoustics.solver.basis == semi_euler.solver.basis + + performance_counter = PerformanceCounter() + new(semi_acoustics, semi_euler, performance_counter, cache) + end end - -function SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics, semi_euler::SemiEuler; - source_region=x->true, weights=x->1.0) where - {Mesh, SemiAcoustics<:SemidiscretizationHyperbolic{Mesh, <:AbstractAcousticPerturbationEquations}, - SemiEuler<:SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}} - - cache = create_cache(SemidiscretizationEulerAcoustics, source_region, weights, - mesh_equations_solver_cache(semi_acoustics)...) - - return SemidiscretizationEulerAcoustics{typeof(semi_acoustics), typeof(semi_euler), typeof(cache)}( - semi_acoustics, semi_euler, cache) +function SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics, + semi_euler::SemiEuler; + source_region = x -> true, + weights = x -> 1.0) where + {Mesh, + SemiAcoustics <: + SemidiscretizationHyperbolic{Mesh, <:AbstractAcousticPerturbationEquations}, + SemiEuler <: + SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}} + cache = create_cache(SemidiscretizationEulerAcoustics, source_region, weights, + mesh_equations_solver_cache(semi_acoustics)...) + + return SemidiscretizationEulerAcoustics{typeof(semi_acoustics), typeof(semi_euler), + typeof(cache)}(semi_acoustics, semi_euler, + cache) end function create_cache(::Type{SemidiscretizationEulerAcoustics}, source_region, weights, - mesh, equations::AcousticPerturbationEquations2D, dg::DGSEM, cache) + mesh, equations::AcousticPerturbationEquations2D, dg::DGSEM, + cache) + coupled_element_ids = get_coupled_element_ids(source_region, equations, dg, cache) - coupled_element_ids = get_coupled_element_ids(source_region, equations, dg, cache) + acoustic_source_terms = zeros(eltype(cache.elements), + (ndims(equations), nnodes(dg), nnodes(dg), + length(coupled_element_ids))) - acoustic_source_terms = zeros(eltype(cache.elements), (ndims(equations), nnodes(dg), nnodes(dg), - length(coupled_element_ids))) + acoustic_source_weights = precompute_weights(source_region, weights, + coupled_element_ids, + equations, dg, cache) - acoustic_source_weights = precompute_weights(source_region, weights, coupled_element_ids, - equations, dg, cache) - - return (; acoustic_source_terms, acoustic_source_weights, coupled_element_ids) + return (; acoustic_source_terms, acoustic_source_weights, coupled_element_ids) end function get_coupled_element_ids(source_region, equations, dg::DGSEM, cache) - coupled_element_ids = Vector{Int}(undef, 0) - - for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element) - if source_region(x) - push!(coupled_element_ids, element) - break - end + coupled_element_ids = Vector{Int}(undef, 0) + + for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, + element) + if source_region(x) + push!(coupled_element_ids, element) + break + end + end end - end - return coupled_element_ids + return coupled_element_ids end -function precompute_weights(source_region, weights, coupled_element_ids, equations, dg::DGSEM, cache) - acoustic_source_weights = zeros(eltype(cache.elements), - (nnodes(dg), nnodes(dg), length(coupled_element_ids))) - - @threaded for k in 1:length(coupled_element_ids) - element = coupled_element_ids[k] - for j in eachnode(dg), i in eachnode(dg) - x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element) - acoustic_source_weights[i, j, k] = source_region(x) ? weights(x) : zero(weights(x)) +function precompute_weights(source_region, weights, coupled_element_ids, equations, + dg::DGSEM, cache) + acoustic_source_weights = zeros(eltype(cache.elements), + (nnodes(dg), nnodes(dg), + length(coupled_element_ids))) + + @threaded for k in 1:length(coupled_element_ids) + element = coupled_element_ids[k] + for j in eachnode(dg), i in eachnode(dg) + x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, + element) + acoustic_source_weights[i, j, k] = source_region(x) ? weights(x) : + zero(weights(x)) + end end - end - return acoustic_source_weights + return acoustic_source_weights end - function Base.show(io::IO, semi::SemidiscretizationEulerAcoustics) - @nospecialize semi # reduce precompilation time - - print(io, "SemidiscretizationEulerAcoustics(") - print(io, semi.semi_acoustics) - print(io, ", ", semi.semi_euler) - print(io, ", cache(") - for (idx, key) in enumerate(keys(semi.cache)) - idx > 1 && print(io, " ") - print(io, key) - end - print(io, "))") + @nospecialize semi # reduce precompilation time + + print(io, "SemidiscretizationEulerAcoustics(") + print(io, semi.semi_acoustics) + print(io, ", ", semi.semi_euler) + print(io, ", cache(") + for (idx, key) in enumerate(keys(semi.cache)) + idx > 1 && print(io, " ") + print(io, key) + end + print(io, "))") end -function Base.show(io::IO, mime::MIME"text/plain", semi::SemidiscretizationEulerAcoustics) - @nospecialize semi # reduce precompilation time - - if get(io, :compact, false) - show(io, semi) - else - summary_header(io, "SemidiscretizationEulerAcoustics") - summary_line(io, "semidiscretization Euler", semi.semi_euler |> typeof |> nameof) - show(increment_indent(io), mime, semi.semi_euler) - summary_line(io, "semidiscretization acoustics", semi.semi_acoustics |> typeof |> nameof) - show(increment_indent(io), mime, semi.semi_acoustics) - summary_footer(io) - end +function Base.show(io::IO, mime::MIME"text/plain", + semi::SemidiscretizationEulerAcoustics) + @nospecialize semi # reduce precompilation time + + if get(io, :compact, false) + show(io, semi) + else + summary_header(io, "SemidiscretizationEulerAcoustics") + summary_line(io, "semidiscretization Euler", + semi.semi_euler |> typeof |> nameof) + show(increment_indent(io), mime, semi.semi_euler) + summary_line(io, "semidiscretization acoustics", + semi.semi_acoustics |> typeof |> nameof) + show(increment_indent(io), mime, semi.semi_acoustics) + summary_footer(io) + end end - # The acoustics semidiscretization is the main semidiscretization. @inline function mesh_equations_solver_cache(semi::SemidiscretizationEulerAcoustics) - return mesh_equations_solver_cache(semi.semi_acoustics) + return mesh_equations_solver_cache(semi.semi_acoustics) end - @inline Base.ndims(semi::SemidiscretizationEulerAcoustics) = ndims(semi.semi_acoustics) @inline Base.real(semi::SemidiscretizationEulerAcoustics) = real(semi.semi_acoustics) - # Computes the coefficients of the initial condition @inline function compute_coefficients(t, semi::SemidiscretizationEulerAcoustics) - compute_coefficients(t, semi.semi_acoustics) + compute_coefficients(t, semi.semi_acoustics) end @inline function compute_coefficients!(u_ode, t, semi::SemidiscretizationEulerAcoustics) - compute_coefficients!(u_ode, t, semi.semi_acoustics) + compute_coefficients!(u_ode, t, semi.semi_acoustics) end - -@inline function calc_error_norms(func, u, t, analyzer, semi::SemidiscretizationEulerAcoustics, +@inline function calc_error_norms(func, u, t, analyzer, + semi::SemidiscretizationEulerAcoustics, cache_analysis) - calc_error_norms(func, u, t, analyzer, semi.semi_acoustics, cache_analysis) + calc_error_norms(func, u, t, analyzer, semi.semi_acoustics, cache_analysis) end - function rhs!(du_ode, u_ode, semi::SemidiscretizationEulerAcoustics, t) - @unpack semi_acoustics, cache = semi - @unpack acoustic_source_terms, acoustic_source_weights, coupled_element_ids = cache + @unpack semi_acoustics, cache = semi + @unpack acoustic_source_terms, acoustic_source_weights, coupled_element_ids = cache - du_acoustics = wrap_array(du_ode, semi_acoustics) + du_acoustics = wrap_array(du_ode, semi_acoustics) - time_start = time_ns() + time_start = time_ns() - @trixi_timeit timer() "acoustics rhs!" rhs!(du_ode, u_ode, semi_acoustics, t) + @trixi_timeit timer() "acoustics rhs!" rhs!(du_ode, u_ode, semi_acoustics, t) - @trixi_timeit timer() "add acoustic source terms" add_acoustic_source_terms!( - du_acoustics, acoustic_source_terms, acoustic_source_weights, coupled_element_ids, - mesh_equations_solver_cache(semi_acoustics)...) + @trixi_timeit timer() "add acoustic source terms" begin + add_acoustic_source_terms!(du_acoustics, acoustic_source_terms, + acoustic_source_weights, coupled_element_ids, + mesh_equations_solver_cache(semi_acoustics)...) + end - runtime = time_ns() - time_start - put!(semi.performance_counter, runtime) + runtime = time_ns() - time_start + put!(semi.performance_counter, runtime) - return nothing + return nothing end - function add_acoustic_source_terms!(du_acoustics, acoustic_source_terms, source_weights, - coupled_element_ids, mesh::TreeMesh{2}, equations, dg::DGSEM, + coupled_element_ids, mesh::TreeMesh{2}, equations, + dg::DGSEM, cache) - - @threaded for k in 1:length(coupled_element_ids) - element = coupled_element_ids[k] - - for j in eachnode(dg), i in eachnode(dg) - du_acoustics[1, i, j, element] += source_weights[i, j, k] * acoustic_source_terms[1, i, j, k] - du_acoustics[2, i, j, element] += source_weights[i, j, k] * acoustic_source_terms[2, i, j, k] + @threaded for k in 1:length(coupled_element_ids) + element = coupled_element_ids[k] + + for j in eachnode(dg), i in eachnode(dg) + du_acoustics[1, i, j, element] += source_weights[i, j, k] * + acoustic_source_terms[1, i, j, k] + du_acoustics[2, i, j, element] += source_weights[i, j, k] * + acoustic_source_terms[2, i, j, k] + end end - end - return nothing + return nothing end - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl index 1a8d7bfad9d..665f2be9bfa 100644 --- a/src/semidiscretization/semidiscretization_euler_gravity.jl +++ b/src/semidiscretization/semidiscretization_euler_gravity.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ ParametersEulerGravity(; background_density=0.0, @@ -15,54 +15,56 @@ Set up parameters for the gravitational part of a [`SemidiscretizationEulerGravity`](@ref). """ -struct ParametersEulerGravity{RealT<:Real, TimestepGravity} - background_density ::RealT # aka rho0 - gravitational_constant::RealT # aka G - cfl ::RealT - resid_tol ::RealT - n_iterations_max ::Int - timestep_gravity::TimestepGravity +struct ParametersEulerGravity{RealT <: Real, TimestepGravity} + background_density :: RealT # aka rho0 + gravitational_constant :: RealT # aka G + cfl :: RealT + resid_tol :: RealT + n_iterations_max :: Int + timestep_gravity :: TimestepGravity end -function ParametersEulerGravity(; background_density=0.0, - gravitational_constant=1.0, - cfl=1.0, - resid_tol=1.0e-4, - n_iterations_max=10^4, - timestep_gravity=timestep_gravity_erk52_3Sstar!) - background_density, gravitational_constant, cfl, resid_tol = promote(background_density, gravitational_constant, cfl, resid_tol) - ParametersEulerGravity(background_density, gravitational_constant, cfl, resid_tol, n_iterations_max, timestep_gravity) +function ParametersEulerGravity(; background_density = 0.0, + gravitational_constant = 1.0, + cfl = 1.0, + resid_tol = 1.0e-4, + n_iterations_max = 10^4, + timestep_gravity = timestep_gravity_erk52_3Sstar!) + background_density, gravitational_constant, cfl, resid_tol = promote(background_density, + gravitational_constant, + cfl, resid_tol) + ParametersEulerGravity(background_density, gravitational_constant, cfl, resid_tol, + n_iterations_max, timestep_gravity) end function Base.show(io::IO, parameters::ParametersEulerGravity) - @nospecialize parameters # reduce precompilation time - - print(io, "ParametersEulerGravity(") - print(io, "background_density=", parameters.background_density) - print(io, ", gravitational_constant=", parameters.gravitational_constant) - print(io, ", cfl=", parameters.cfl) - print(io, ", n_iterations_max=", parameters.n_iterations_max) - print(io, ", timestep_gravity=", parameters.timestep_gravity) - print(io, ")") + @nospecialize parameters # reduce precompilation time + + print(io, "ParametersEulerGravity(") + print(io, "background_density=", parameters.background_density) + print(io, ", gravitational_constant=", parameters.gravitational_constant) + print(io, ", cfl=", parameters.cfl) + print(io, ", n_iterations_max=", parameters.n_iterations_max) + print(io, ", timestep_gravity=", parameters.timestep_gravity) + print(io, ")") end function Base.show(io::IO, ::MIME"text/plain", parameters::ParametersEulerGravity) - @nospecialize parameters # reduce precompilation time - - if get(io, :compact, false) - show(io, parameters) - else - setup = [ - "background density (ρ₀)" => parameters.background_density, - "gravitational constant (G)" => parameters.gravitational_constant, - "CFL (gravity)" => parameters.cfl, - "max. #iterations" => parameters.n_iterations_max, - "time integrator" => parameters.timestep_gravity, - ] - summary_box(io, "ParametersEulerGravity", setup) - end + @nospecialize parameters # reduce precompilation time + + if get(io, :compact, false) + show(io, parameters) + else + setup = [ + "background density (ρ₀)" => parameters.background_density, + "gravitational constant (G)" => parameters.gravitational_constant, + "CFL (gravity)" => parameters.cfl, + "max. #iterations" => parameters.n_iterations_max, + "time integrator" => parameters.timestep_gravity, + ] + summary_box(io, "ParametersEulerGravity", setup) + end end - """ SemidiscretizationEulerGravity @@ -75,27 +77,35 @@ the hyperblic diffusion equations. [arXiv: 2008.10593](https://arXiv.org/abs/2008.10593) """ struct SemidiscretizationEulerGravity{SemiEuler, SemiGravity, - Parameters<:ParametersEulerGravity, Cache} <: AbstractSemidiscretization - semi_euler::SemiEuler - semi_gravity::SemiGravity - parameters::Parameters - performance_counter::PerformanceCounter - gravity_counter::PerformanceCounter - cache::Cache - - function SemidiscretizationEulerGravity{SemiEuler, SemiGravity, Parameters, Cache}( - semi_euler::SemiEuler, semi_gravity::SemiGravity, - parameters::Parameters, cache::Cache) where {SemiEuler, SemiGravity, - Parameters<:ParametersEulerGravity, Cache} - @assert ndims(semi_euler) == ndims(semi_gravity) - @assert typeof(semi_euler.mesh) == typeof(semi_gravity.mesh) - @assert polydeg(semi_euler.solver) == polydeg(semi_gravity.solver) - - performance_counter = PerformanceCounter() - gravity_counter = PerformanceCounter() - - new(semi_euler, semi_gravity, parameters, performance_counter, gravity_counter, cache) - end + Parameters <: ParametersEulerGravity, Cache} <: + AbstractSemidiscretization + semi_euler :: SemiEuler + semi_gravity :: SemiGravity + parameters :: Parameters + performance_counter :: PerformanceCounter + gravity_counter :: PerformanceCounter + cache :: Cache + + function SemidiscretizationEulerGravity{SemiEuler, SemiGravity, Parameters, Cache}(semi_euler::SemiEuler, + semi_gravity::SemiGravity, + parameters::Parameters, + cache::Cache) where { + SemiEuler, + SemiGravity, + Parameters <: + ParametersEulerGravity, + Cache + } + @assert ndims(semi_euler) == ndims(semi_gravity) + @assert typeof(semi_euler.mesh) == typeof(semi_gravity.mesh) + @assert polydeg(semi_euler.solver) == polydeg(semi_gravity.solver) + + performance_counter = PerformanceCounter() + gravity_counter = PerformanceCounter() + + new(semi_euler, semi_gravity, parameters, performance_counter, gravity_counter, + cache) + end end """ @@ -104,346 +114,391 @@ end Construct a semidiscretization of the compressible Euler equations with self-gravity. `parameters` should be given as [`ParametersEulerGravity`](@ref). """ -function SemidiscretizationEulerGravity(semi_euler::SemiEuler, semi_gravity::SemiGravity, parameters) where - {Mesh, SemiEuler<:SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}, - SemiGravity<:SemidiscretizationHyperbolic{Mesh, <:AbstractHyperbolicDiffusionEquations}} - - u_ode = compute_coefficients(zero(real(semi_gravity)), semi_gravity) - du_ode = similar(u_ode) - u_tmp1_ode = similar(u_ode) - u_tmp2_ode = similar(u_ode) - cache = (; u_ode, du_ode, u_tmp1_ode, u_tmp2_ode) - - SemidiscretizationEulerGravity{typeof(semi_euler), typeof(semi_gravity), typeof(parameters), typeof(cache)}( - semi_euler, semi_gravity, parameters, cache) +function SemidiscretizationEulerGravity(semi_euler::SemiEuler, + semi_gravity::SemiGravity, + parameters) where + {Mesh, + SemiEuler <: + SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}, + SemiGravity <: + SemidiscretizationHyperbolic{Mesh, <:AbstractHyperbolicDiffusionEquations}} + u_ode = compute_coefficients(zero(real(semi_gravity)), semi_gravity) + du_ode = similar(u_ode) + u_tmp1_ode = similar(u_ode) + u_tmp2_ode = similar(u_ode) + cache = (; u_ode, du_ode, u_tmp1_ode, u_tmp2_ode) + + SemidiscretizationEulerGravity{typeof(semi_euler), typeof(semi_gravity), + typeof(parameters), typeof(cache)}(semi_euler, + semi_gravity, + parameters, cache) end - # TODO: AD, add appropriate method for remake - function Base.show(io::IO, semi::SemidiscretizationEulerGravity) - @nospecialize semi # reduce precompilation time - - print(io, "SemidiscretizationEulerGravity using") - print(io, semi.semi_euler) - print(io, ", ", semi.semi_gravity) - print(io, ", ", semi.parameters) - print(io, ", cache(") - for (idx,key) in enumerate(keys(semi.cache)) - idx > 1 && print(io, " ") - print(io, key) - end - print(io, "))") + @nospecialize semi # reduce precompilation time + + print(io, "SemidiscretizationEulerGravity using") + print(io, semi.semi_euler) + print(io, ", ", semi.semi_gravity) + print(io, ", ", semi.parameters) + print(io, ", cache(") + for (idx, key) in enumerate(keys(semi.cache)) + idx > 1 && print(io, " ") + print(io, key) + end + print(io, "))") end function Base.show(io::IO, mime::MIME"text/plain", semi::SemidiscretizationEulerGravity) - @nospecialize semi # reduce precompilation time - - if get(io, :compact, false) - show(io, semi) - else - summary_header(io, "SemidiscretizationEulerGravity") - summary_line(io, "semidiscretization Euler", semi.semi_euler |> typeof |> nameof) - show(increment_indent(io), mime, semi.semi_euler) - summary_line(io, "semidiscretization gravity", semi.semi_gravity |> typeof |> nameof) - show(increment_indent(io), mime, semi.semi_gravity) - summary_line(io, "parameters", semi.parameters |> typeof |> nameof) - show(increment_indent(io), mime, semi.parameters) - summary_footer(io) - end + @nospecialize semi # reduce precompilation time + + if get(io, :compact, false) + show(io, semi) + else + summary_header(io, "SemidiscretizationEulerGravity") + summary_line(io, "semidiscretization Euler", + semi.semi_euler |> typeof |> nameof) + show(increment_indent(io), mime, semi.semi_euler) + summary_line(io, "semidiscretization gravity", + semi.semi_gravity |> typeof |> nameof) + show(increment_indent(io), mime, semi.semi_gravity) + summary_line(io, "parameters", semi.parameters |> typeof |> nameof) + show(increment_indent(io), mime, semi.parameters) + summary_footer(io) + end end - # The compressible Euler semidiscretization is considered to be the main semidiscretization. # The hyperbolic diffusion equations part is only used internally to update the gravitational # potential during an rhs! evaluation of the flow solver. @inline function mesh_equations_solver_cache(semi::SemidiscretizationEulerGravity) - mesh_equations_solver_cache(semi.semi_euler) + mesh_equations_solver_cache(semi.semi_euler) end @inline Base.ndims(semi::SemidiscretizationEulerGravity) = ndims(semi.semi_euler) @inline Base.real(semi::SemidiscretizationEulerGravity) = real(semi.semi_euler) - # computes the coefficients of the initial condition @inline function compute_coefficients(t, semi::SemidiscretizationEulerGravity) - compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity) - compute_coefficients(t, semi.semi_euler) + compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity) + compute_coefficients(t, semi.semi_euler) end # computes the coefficients of the initial condition and stores the Euler part in `u_ode` @inline function compute_coefficients!(u_ode, t, semi::SemidiscretizationEulerGravity) - compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity) - compute_coefficients!(u_ode, t, semi.semi_euler) + compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity) + compute_coefficients!(u_ode, t, semi.semi_euler) end - -@inline function calc_error_norms(func, u, t, analyzer, semi::SemidiscretizationEulerGravity, cache_analysis) - calc_error_norms(func, u, t, analyzer, semi.semi_euler, cache_analysis) +@inline function calc_error_norms(func, u, t, analyzer, + semi::SemidiscretizationEulerGravity, cache_analysis) + calc_error_norms(func, u, t, analyzer, semi.semi_euler, cache_analysis) end - function rhs!(du_ode, u_ode, semi::SemidiscretizationEulerGravity, t) - @unpack semi_euler, semi_gravity, cache = semi - - u_euler = wrap_array(u_ode , semi_euler) - du_euler = wrap_array(du_ode, semi_euler) - u_gravity = wrap_array(cache.u_ode, semi_gravity) - - time_start = time_ns() - - # standard semidiscretization of the compressible Euler equations - @trixi_timeit timer() "Euler solver" rhs!(du_ode, u_ode, semi_euler, t) - - # compute gravitational potential and forces - @trixi_timeit timer() "gravity solver" update_gravity!(semi, u_ode) - - # add gravitational source source_terms to the Euler part - if ndims(semi_euler) == 1 - @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] - @views @. du_euler[3, .., :] -= u_euler[2, .., :] * u_gravity[2, .., :] - elseif ndims(semi_euler) == 2 - @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] - @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :] - @views @. du_euler[4, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] + - u_euler[3, .., :] * u_gravity[3, .., :]) - elseif ndims(semi_euler) == 3 - @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] - @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :] - @views @. du_euler[4, .., :] -= u_euler[1, .., :] * u_gravity[4, .., :] - @views @. du_euler[5, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] + - u_euler[3, .., :] * u_gravity[3, .., :] + - u_euler[4, .., :] * u_gravity[4, .., :]) - else - error("Number of dimensions $(ndims(semi_euler)) not supported.") - end - - runtime = time_ns() - time_start - put!(semi.performance_counter, runtime) - - return nothing -end + @unpack semi_euler, semi_gravity, cache = semi + u_euler = wrap_array(u_ode, semi_euler) + du_euler = wrap_array(du_ode, semi_euler) + u_gravity = wrap_array(cache.u_ode, semi_gravity) -# TODO: Taal refactor, add some callbacks or so within the gravity update to allow investigating/optimizing it -function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) - @unpack semi_euler, semi_gravity, parameters, gravity_counter, cache = semi - - # Can be changed by AMR - resize!(cache.du_ode, length(cache.u_ode)) - resize!(cache.u_tmp1_ode, length(cache.u_ode)) - resize!(cache.u_tmp2_ode, length(cache.u_ode)) - - u_euler = wrap_array(u_ode, semi_euler) - u_gravity = wrap_array(cache.u_ode, semi_gravity) - du_gravity = wrap_array(cache.du_ode, semi_gravity) - - # set up main loop - finalstep = false - @unpack n_iterations_max, cfl, resid_tol, timestep_gravity = parameters - iter = 0 - t = zero(real(semi_gravity.solver)) - - # iterate gravity solver until convergence or maximum number of iterations are reached - @unpack equations = semi_gravity - while !finalstep - dt = @trixi_timeit timer() "calculate dt" cfl * max_dt(u_gravity, t, semi_gravity.mesh, - have_constant_speed(equations), equations, - semi_gravity.solver, semi_gravity.cache) - - # evolve solution by one pseudo-time step time_start = time_ns() - timestep_gravity(cache, u_euler, t, dt, parameters, semi_gravity) - runtime = time_ns() - time_start - put!(gravity_counter, runtime) - # update iteration counter - iter += 1 - t += dt - - # check if we reached the maximum number of iterations - if n_iterations_max > 0 && iter >= n_iterations_max - @warn "Max iterations reached: Gravity solver failed to converge!" residual=maximum(abs, @views du_gravity[1, .., :]) t=t dt=dt - finalstep = true + # standard semidiscretization of the compressible Euler equations + @trixi_timeit timer() "Euler solver" rhs!(du_ode, u_ode, semi_euler, t) + + # compute gravitational potential and forces + @trixi_timeit timer() "gravity solver" update_gravity!(semi, u_ode) + + # add gravitational source source_terms to the Euler part + if ndims(semi_euler) == 1 + @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] + @views @. du_euler[3, .., :] -= u_euler[2, .., :] * u_gravity[2, .., :] + elseif ndims(semi_euler) == 2 + @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] + @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :] + @views @. du_euler[4, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] + + u_euler[3, .., :] * u_gravity[3, .., :]) + elseif ndims(semi_euler) == 3 + @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :] + @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :] + @views @. du_euler[4, .., :] -= u_euler[1, .., :] * u_gravity[4, .., :] + @views @. du_euler[5, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] + + u_euler[3, .., :] * u_gravity[3, .., :] + + u_euler[4, .., :] * u_gravity[4, .., :]) + else + error("Number of dimensions $(ndims(semi_euler)) not supported.") end - # this is an absolute tolerance check - if maximum(abs, @views du_gravity[1, .., :]) <= resid_tol - finalstep = true - end - end + runtime = time_ns() - time_start + put!(semi.performance_counter, runtime) - return nothing + return nothing end +# TODO: Taal refactor, add some callbacks or so within the gravity update to allow investigating/optimizing it +function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) + @unpack semi_euler, semi_gravity, parameters, gravity_counter, cache = semi + + # Can be changed by AMR + resize!(cache.du_ode, length(cache.u_ode)) + resize!(cache.u_tmp1_ode, length(cache.u_ode)) + resize!(cache.u_tmp2_ode, length(cache.u_ode)) + + u_euler = wrap_array(u_ode, semi_euler) + u_gravity = wrap_array(cache.u_ode, semi_gravity) + du_gravity = wrap_array(cache.du_ode, semi_gravity) + + # set up main loop + finalstep = false + @unpack n_iterations_max, cfl, resid_tol, timestep_gravity = parameters + iter = 0 + t = zero(real(semi_gravity.solver)) + + # iterate gravity solver until convergence or maximum number of iterations are reached + @unpack equations = semi_gravity + while !finalstep + dt = @trixi_timeit timer() "calculate dt" begin + cfl * max_dt(u_gravity, t, semi_gravity.mesh, + have_constant_speed(equations), equations, + semi_gravity.solver, semi_gravity.cache) + end + + # evolve solution by one pseudo-time step + time_start = time_ns() + timestep_gravity(cache, u_euler, t, dt, parameters, semi_gravity) + runtime = time_ns() - time_start + put!(gravity_counter, runtime) + + # update iteration counter + iter += 1 + t += dt + + # check if we reached the maximum number of iterations + if n_iterations_max > 0 && iter >= n_iterations_max + @warn "Max iterations reached: Gravity solver failed to converge!" residual=maximum(abs, + @views du_gravity[1, + .., + :]) t=t dt=dt + finalstep = true + end + + # this is an absolute tolerance check + if maximum(abs, @views du_gravity[1, .., :]) <= resid_tol + finalstep = true + end + end + + return nothing +end # Integrate gravity solver for 2N-type low-storage schemes function timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, a, b, c) - G = gravity_parameters.gravitational_constant - rho0 = gravity_parameters.background_density - grav_scale = -4.0*pi*G - - @unpack u_ode, du_ode, u_tmp1_ode = cache - u_tmp1_ode .= zero(eltype(u_tmp1_ode)) - du_gravity = wrap_array(du_ode, semi_gravity) - for stage in eachindex(c) - t_stage = t + dt * c[stage] - - # rhs! has the source term for the harmonic problem - # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already - # included in the `rhs!` call. - rhs!(du_ode, u_ode, semi_gravity, t_stage) - - # Source term: Jeans instability OR coupling convergence test OR blast wave - # put in gravity source term proportional to Euler density - # OBS! subtract off the background density ρ_0 (spatial mean value) - @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0) - - a_stage = a[stage] - b_stage_dt = b[stage] * dt - @trixi_timeit timer() "Runge-Kutta step" begin - @threaded for idx in eachindex(u_ode) - u_tmp1_ode[idx] = du_ode[idx] - u_tmp1_ode[idx] * a_stage - u_ode[idx] += u_tmp1_ode[idx] * b_stage_dt - end + G = gravity_parameters.gravitational_constant + rho0 = gravity_parameters.background_density + grav_scale = -4.0 * pi * G + + @unpack u_ode, du_ode, u_tmp1_ode = cache + u_tmp1_ode .= zero(eltype(u_tmp1_ode)) + du_gravity = wrap_array(du_ode, semi_gravity) + for stage in eachindex(c) + t_stage = t + dt * c[stage] + + # rhs! has the source term for the harmonic problem + # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already + # included in the `rhs!` call. + rhs!(du_ode, u_ode, semi_gravity, t_stage) + + # Source term: Jeans instability OR coupling convergence test OR blast wave + # put in gravity source term proportional to Euler density + # OBS! subtract off the background density ρ_0 (spatial mean value) + @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0) + + a_stage = a[stage] + b_stage_dt = b[stage] * dt + @trixi_timeit timer() "Runge-Kutta step" begin + @threaded for idx in eachindex(u_ode) + u_tmp1_ode[idx] = du_ode[idx] - u_tmp1_ode[idx] * a_stage + u_ode[idx] += u_tmp1_ode[idx] * b_stage_dt + end + end end - end - return nothing + return nothing end -function timestep_gravity_carpenter_kennedy_erk54_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity) - # Coefficients for Carpenter's 5-stage 4th-order low-storage Runge-Kutta method - a = SVector(0.0, 567301805773.0 / 1357537059087.0,2404267990393.0 / 2016746695238.0, - 3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0) - b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0, - 1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0, - 2277821191437.0 / 14882151754819.0) - c = SVector(0.0, 1432997174477.0 / 9575080441755.0, 2526269341429.0 / 6820363962896.0, - 2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0) - - timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, a, b, c) +function timestep_gravity_carpenter_kennedy_erk54_2N!(cache, u_euler, t, dt, + gravity_parameters, semi_gravity) + # Coefficients for Carpenter's 5-stage 4th-order low-storage Runge-Kutta method + a = SVector(0.0, 567301805773.0 / 1357537059087.0, + 2404267990393.0 / 2016746695238.0, + 3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0) + b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0, + 1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0, + 2277821191437.0 / 14882151754819.0) + c = SVector(0.0, 1432997174477.0 / 9575080441755.0, + 2526269341429.0 / 6820363962896.0, + 2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0) + + timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, a, b, + c) end - # Integrate gravity solver for 3S*-type low-storage schemes -function timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, +function timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, + semi_gravity, gamma1, gamma2, gamma3, beta, delta, c) - G = gravity_parameters.gravitational_constant - rho0 = gravity_parameters.background_density - grav_scale = -4 * G * pi - - @unpack u_ode, du_ode, u_tmp1_ode, u_tmp2_ode = cache - u_tmp1_ode .= zero(eltype(u_tmp1_ode)) - u_tmp2_ode .= u_ode - du_gravity = wrap_array(du_ode, semi_gravity) - for stage in eachindex(c) - t_stage = t + dt * c[stage] - - # rhs! has the source term for the harmonic problem - # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already - # included in the `rhs!` call. - rhs!(du_ode, u_ode, semi_gravity, t_stage) - - # Source term: Jeans instability OR coupling convergence test OR blast wave - # put in gravity source term proportional to Euler density - # OBS! subtract off the background density ρ_0 around which the Jeans instability is perturbed - @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0) - - delta_stage = delta[stage] - gamma1_stage = gamma1[stage] - gamma2_stage = gamma2[stage] - gamma3_stage = gamma3[stage] - beta_stage_dt = beta[stage] * dt - @trixi_timeit timer() "Runge-Kutta step" begin - @threaded for idx in eachindex(u_ode) - u_tmp1_ode[idx] += delta_stage * u_ode[idx] - u_ode[idx] = (gamma1_stage * u_ode[idx] + - gamma2_stage * u_tmp1_ode[idx] + - gamma3_stage * u_tmp2_ode[idx] + - beta_stage_dt * du_ode[idx]) - end + G = gravity_parameters.gravitational_constant + rho0 = gravity_parameters.background_density + grav_scale = -4 * G * pi + + @unpack u_ode, du_ode, u_tmp1_ode, u_tmp2_ode = cache + u_tmp1_ode .= zero(eltype(u_tmp1_ode)) + u_tmp2_ode .= u_ode + du_gravity = wrap_array(du_ode, semi_gravity) + for stage in eachindex(c) + t_stage = t + dt * c[stage] + + # rhs! has the source term for the harmonic problem + # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already + # included in the `rhs!` call. + rhs!(du_ode, u_ode, semi_gravity, t_stage) + + # Source term: Jeans instability OR coupling convergence test OR blast wave + # put in gravity source term proportional to Euler density + # OBS! subtract off the background density ρ_0 around which the Jeans instability is perturbed + @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0) + + delta_stage = delta[stage] + gamma1_stage = gamma1[stage] + gamma2_stage = gamma2[stage] + gamma3_stage = gamma3[stage] + beta_stage_dt = beta[stage] * dt + @trixi_timeit timer() "Runge-Kutta step" begin + @threaded for idx in eachindex(u_ode) + u_tmp1_ode[idx] += delta_stage * u_ode[idx] + u_ode[idx] = (gamma1_stage * u_ode[idx] + + gamma2_stage * u_tmp1_ode[idx] + + gamma3_stage * u_tmp2_ode[idx] + + beta_stage_dt * du_ode[idx]) + end + end end - end - return nothing + return nothing end -function timestep_gravity_erk51_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity) - # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 - # and examples/parameters_hypdiff_lax_friedrichs.toml - # 5 stages, order 1 - gamma1 = SVector(0.0000000000000000E+00, 5.2910412316555866E-01, 2.8433964362349406E-01, -1.4467571130907027E+00, 7.5592215948661057E-02) - gamma2 = SVector(1.0000000000000000E+00, 2.6366970460864109E-01, 3.7423646095836322E-01, 7.8786901832431289E-01, 3.7754129043053775E-01) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 8.0043329115077388E-01, 1.3550099149374278E-01) - beta = SVector(1.9189497208340553E-01, 5.4506406707700059E-02, 1.2103893164085415E-01, 6.8582252490550921E-01, 8.7914657211972225E-01) - delta = SVector(1.0000000000000000E+00, 7.8593091509463076E-01, 1.2639038717454840E-01, 1.7726945920209813E-01, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 1.9189497208340553E-01, 1.9580448818599061E-01, 2.4241635859769023E-01, 5.0728347557552977E-01) - - timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, - gamma1, gamma2, gamma3, beta, delta, c) +function timestep_gravity_erk51_3Sstar!(cache, u_euler, t, dt, gravity_parameters, + semi_gravity) + # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 + # and examples/parameters_hypdiff_lax_friedrichs.toml + # 5 stages, order 1 + gamma1 = SVector(0.0000000000000000E+00, 5.2910412316555866E-01, + 2.8433964362349406E-01, -1.4467571130907027E+00, + 7.5592215948661057E-02) + gamma2 = SVector(1.0000000000000000E+00, 2.6366970460864109E-01, + 3.7423646095836322E-01, 7.8786901832431289E-01, + 3.7754129043053775E-01) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00, 8.0043329115077388E-01, + 1.3550099149374278E-01) + beta = SVector(1.9189497208340553E-01, 5.4506406707700059E-02, + 1.2103893164085415E-01, 6.8582252490550921E-01, + 8.7914657211972225E-01) + delta = SVector(1.0000000000000000E+00, 7.8593091509463076E-01, + 1.2639038717454840E-01, 1.7726945920209813E-01, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 1.9189497208340553E-01, 1.9580448818599061E-01, + 2.4241635859769023E-01, 5.0728347557552977E-01) + + timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, + gamma1, gamma2, gamma3, beta, delta, c) end -function timestep_gravity_erk52_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity) - # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 - # and examples/parameters_hypdiff_lax_friedrichs.toml - # 5 stages, order 2 - gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01, 1.0385212774098265E+00, 3.6859755007388034E-01, -6.3350615190506088E-01) - gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, -2.7595818152587825E-02, 9.1271323651988631E-02, 6.8495995159465062E-01) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 4.1301005663300466E-01, -5.4537881202277507E-03) - beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01, 3.7561630338850771E-01, 2.9356700007428856E-02, 2.5205285143494666E-01) - delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01, 2.6579275844515687E-01, 9.9687218193685878E-01, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01, 1.0221535725056414E+00, 1.4280257701954349E+00, 7.1581334196229851E-01) - - timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, - gamma1, gamma2, gamma3, beta, delta, c) +function timestep_gravity_erk52_3Sstar!(cache, u_euler, t, dt, gravity_parameters, + semi_gravity) + # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 + # and examples/parameters_hypdiff_lax_friedrichs.toml + # 5 stages, order 2 + gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01, + 1.0385212774098265E+00, 3.6859755007388034E-01, + -6.3350615190506088E-01) + gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, + -2.7595818152587825E-02, 9.1271323651988631E-02, + 6.8495995159465062E-01) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00, 4.1301005663300466E-01, + -5.4537881202277507E-03) + beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01, + 3.7561630338850771E-01, 2.9356700007428856E-02, + 2.5205285143494666E-01) + delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01, + 2.6579275844515687E-01, 9.9687218193685878E-01, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01, 1.0221535725056414E+00, + 1.4280257701954349E+00, 7.1581334196229851E-01) + + timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, + gamma1, gamma2, gamma3, beta, delta, c) end -function timestep_gravity_erk53_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity) - # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 - # and examples/parameters_hypdiff_lax_friedrichs.toml - # 5 stages, order 3 - gamma1 = SVector(0.0000000000000000E+00, 6.9362208054011210E-01, 9.1364483229179472E-01, 1.3129305757628569E+00, -1.4615811339132949E+00) - gamma2 = SVector(1.0000000000000000E+00, 1.3224582239681788E+00, 2.4213162353103135E-01, -3.8532017293685838E-01, 1.5603355704723714E+00) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 3.8306787039991996E-01, -3.5683121201711010E-01) - beta = SVector(8.4476964977404881E-02, 3.0834660698015803E-01, 3.2131664733089232E-01, 2.8783574345390539E-01, 8.2199204703236073E-01) - delta = SVector(1.0000000000000000E+00, -7.6832695815481578E-01, 1.2497251501714818E-01, 1.4496404749796306E+00, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 8.4476964977404881E-02, 2.8110631488732202E-01, 5.7093842145029405E-01, 7.2999896418559662E-01) - - timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, - gamma1, gamma2, gamma3, beta, delta, c) +function timestep_gravity_erk53_3Sstar!(cache, u_euler, t, dt, gravity_parameters, + semi_gravity) + # New 3Sstar coefficients optimized for polynomials of degree polydeg=3 + # and examples/parameters_hypdiff_lax_friedrichs.toml + # 5 stages, order 3 + gamma1 = SVector(0.0000000000000000E+00, 6.9362208054011210E-01, + 9.1364483229179472E-01, 1.3129305757628569E+00, + -1.4615811339132949E+00) + gamma2 = SVector(1.0000000000000000E+00, 1.3224582239681788E+00, + 2.4213162353103135E-01, -3.8532017293685838E-01, + 1.5603355704723714E+00) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00, 3.8306787039991996E-01, + -3.5683121201711010E-01) + beta = SVector(8.4476964977404881E-02, 3.0834660698015803E-01, + 3.2131664733089232E-01, 2.8783574345390539E-01, + 8.2199204703236073E-01) + delta = SVector(1.0000000000000000E+00, -7.6832695815481578E-01, + 1.2497251501714818E-01, 1.4496404749796306E+00, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 8.4476964977404881E-02, 2.8110631488732202E-01, + 5.7093842145029405E-01, 7.2999896418559662E-01) + + timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, + gamma1, gamma2, gamma3, beta, delta, c) end - # TODO: Taal decide, where should specific parts like these be? @inline function save_solution_file(u_ode, t, dt, iter, - semi::SemidiscretizationEulerGravity, solution_callback, - element_variables=Dict{Symbol,Any}()) - - u_euler = wrap_array_native(u_ode, semi.semi_euler) - filename_euler = save_solution_file(u_euler, t, dt, iter, - mesh_equations_solver_cache(semi.semi_euler)..., - solution_callback, element_variables, system="euler") - - u_gravity = wrap_array_native(semi.cache.u_ode, semi.semi_gravity) - filename_gravity = save_solution_file(u_gravity, t, dt, iter, - mesh_equations_solver_cache(semi.semi_gravity)..., - solution_callback, element_variables, system="gravity") - - return filename_euler, filename_gravity + semi::SemidiscretizationEulerGravity, + solution_callback, + element_variables = Dict{Symbol, Any}()) + u_euler = wrap_array_native(u_ode, semi.semi_euler) + filename_euler = save_solution_file(u_euler, t, dt, iter, + mesh_equations_solver_cache(semi.semi_euler)..., + solution_callback, element_variables, + system = "euler") + + u_gravity = wrap_array_native(semi.cache.u_ode, semi.semi_gravity) + filename_gravity = save_solution_file(u_gravity, t, dt, iter, + mesh_equations_solver_cache(semi.semi_gravity)..., + solution_callback, element_variables, + system = "gravity") + + return filename_euler, filename_gravity end - @inline function (amr_callback::AMRCallback)(u_ode, semi::SemidiscretizationEulerGravity, t, iter; kwargs...) - passive_args = ((semi.cache.u_ode, mesh_equations_solver_cache(semi.semi_gravity)...),) - amr_callback(u_ode, mesh_equations_solver_cache(semi.semi_euler)..., semi, t, iter; - kwargs..., passive_args=passive_args) + passive_args = ((semi.cache.u_ode, + mesh_equations_solver_cache(semi.semi_gravity)...),) + amr_callback(u_ode, mesh_equations_solver_cache(semi.semi_euler)..., semi, t, iter; + kwargs..., passive_args = passive_args) end - - end # @muladd diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 7e93f2a7f64..50b2c21c14e 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SemidiscretizationHyperbolic @@ -11,33 +11,42 @@ A struct containing everything needed to describe a spatial semidiscretization of a hyperbolic conservation law. """ -struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions, - SourceTerms, Solver, Cache} <: AbstractSemidiscretization - - mesh::Mesh - equations::Equations - - # This guy is a bit messy since we abuse it as some kind of "exact solution" - # although this doesn't really exist... - initial_condition::InitialCondition - - boundary_conditions::BoundaryConditions - source_terms::SourceTerms - solver::Solver - cache::Cache - performance_counter::PerformanceCounter - - function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions, SourceTerms, Solver, Cache}( - mesh::Mesh, equations::Equations, - initial_condition::InitialCondition, boundary_conditions::BoundaryConditions, - source_terms::SourceTerms, - solver::Solver, cache::Cache) where {Mesh, Equations, InitialCondition, BoundaryConditions, SourceTerms, Solver, Cache} - @assert ndims(mesh) == ndims(equations) - - performance_counter = PerformanceCounter() - - new(mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache, performance_counter) - end +struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, + BoundaryConditions, + SourceTerms, Solver, Cache} <: + AbstractSemidiscretization + mesh::Mesh + equations::Equations + + # This guy is a bit messy since we abuse it as some kind of "exact solution" + # although this doesn't really exist... + initial_condition::InitialCondition + + boundary_conditions::BoundaryConditions + source_terms::SourceTerms + solver::Solver + cache::Cache + performance_counter::PerformanceCounter + + function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, + BoundaryConditions, SourceTerms, Solver, Cache + }(mesh::Mesh, equations::Equations, + initial_condition::InitialCondition, + boundary_conditions::BoundaryConditions, + source_terms::SourceTerms, + solver::Solver, + cache::Cache) where {Mesh, Equations, + InitialCondition, + BoundaryConditions, + SourceTerms, Solver, + Cache} + @assert ndims(mesh) == ndims(equations) + + performance_counter = PerformanceCounter() + + new(mesh, equations, initial_condition, boundary_conditions, source_terms, + solver, cache, performance_counter) + end end """ @@ -51,208 +60,253 @@ end Construct a semidiscretization of a hyperbolic PDE. """ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver; - source_terms=nothing, - boundary_conditions=boundary_condition_periodic, + source_terms = nothing, + boundary_conditions = boundary_condition_periodic, # `RealT` is used as real type for node locations etc. # while `uEltype` is used as element type of solutions etc. - RealT=real(solver), uEltype=RealT, - initial_cache=NamedTuple()) - - cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., initial_cache...) - _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, cache) - - SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), typeof(solver), typeof(cache)}( - mesh, equations, initial_condition, _boundary_conditions, source_terms, solver, cache) + RealT = real(solver), uEltype = RealT, + initial_cache = NamedTuple()) + cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., + initial_cache...) + _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, + cache) + + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), + typeof(initial_condition), + typeof(_boundary_conditions), typeof(source_terms), + typeof(solver), typeof(cache)}(mesh, equations, + initial_condition, + _boundary_conditions, + source_terms, solver, + cache) end - # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the # semantics we want to use here. In particular, it allows us to re-use mutable parts, # e.g. `remake(semi).mesh === semi.mesh`. -function remake(semi::SemidiscretizationHyperbolic; uEltype=real(semi.solver), - mesh=semi.mesh, - equations=semi.equations, - initial_condition=semi.initial_condition, - solver=semi.solver, - source_terms=semi.source_terms, - boundary_conditions=semi.boundary_conditions - ) - # TODO: Which parts do we want to `remake`? At least the solver needs some - # special care if shock-capturing volume integrals are used (because of - # the indicators and their own caches...). - SemidiscretizationHyperbolic( - mesh, equations, initial_condition, solver; source_terms, boundary_conditions, uEltype) +function remake(semi::SemidiscretizationHyperbolic; uEltype = real(semi.solver), + mesh = semi.mesh, + equations = semi.equations, + initial_condition = semi.initial_condition, + solver = semi.solver, + source_terms = semi.source_terms, + boundary_conditions = semi.boundary_conditions) + # TODO: Which parts do we want to `remake`? At least the solver needs some + # special care if shock-capturing volume integrals are used (because of + # the indicators and their own caches...). + SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver; + source_terms, boundary_conditions, uEltype) end - # general fallback -digest_boundary_conditions(boundary_conditions, mesh, solver, cache) = boundary_conditions +function digest_boundary_conditions(boundary_conditions, mesh, solver, cache) + boundary_conditions +end # general fallback -digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, - mesh, solver, cache) = boundary_conditions +function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, + mesh, solver, cache) + boundary_conditions +end # resolve ambiguities with definitions below -digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) = boundary_conditions +function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, + cache) + boundary_conditions +end -digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, - mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) = boundary_conditions +function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, + mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, + cache) + boundary_conditions +end -digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, - mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) = boundary_conditions +function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic, + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, + cache) + boundary_conditions +end # allow passing a single BC that get converted into a tuple of BCs # on (mapped) hypercube domains function digest_boundary_conditions(boundary_conditions, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) - (; x_neg=boundary_conditions, x_pos=boundary_conditions) + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, + cache) + (; x_neg = boundary_conditions, x_pos = boundary_conditions) end function digest_boundary_conditions(boundary_conditions, - mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) - (; x_neg=boundary_conditions, x_pos=boundary_conditions, - y_neg=boundary_conditions, y_pos=boundary_conditions) + mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, + cache) + (; x_neg = boundary_conditions, x_pos = boundary_conditions, + y_neg = boundary_conditions, y_pos = boundary_conditions) end function digest_boundary_conditions(boundary_conditions, - mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) - (; x_neg=boundary_conditions, x_pos=boundary_conditions, - y_neg=boundary_conditions, y_pos=boundary_conditions, - z_neg=boundary_conditions, z_pos=boundary_conditions) + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, + cache) + (; x_neg = boundary_conditions, x_pos = boundary_conditions, + y_neg = boundary_conditions, y_pos = boundary_conditions, + z_neg = boundary_conditions, z_pos = boundary_conditions) end # allow passing a tuple of BCs that get converted into a named tuple to make it # self-documenting on (mapped) hypercube domains function digest_boundary_conditions(boundary_conditions::NTuple{2, Any}, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) - (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2]) + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, + cache) + (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2]) end function digest_boundary_conditions(boundary_conditions::NTuple{4, Any}, - mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) - (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2], - y_neg=boundary_conditions[3], y_pos=boundary_conditions[4]) + mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, + cache) + (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2], + y_neg = boundary_conditions[3], y_pos = boundary_conditions[4]) end function digest_boundary_conditions(boundary_conditions::NTuple{6, Any}, - mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) - (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2], - y_neg=boundary_conditions[3], y_pos=boundary_conditions[4], - z_neg=boundary_conditions[5], z_pos=boundary_conditions[6]) + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, + cache) + (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2], + y_neg = boundary_conditions[3], y_pos = boundary_conditions[4], + z_neg = boundary_conditions[5], z_pos = boundary_conditions[6]) end # allow passing named tuples of BCs constructed in an arbitrary order # on (mapped) hypercube domains -function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes}, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) where {Keys, ValueTypes<:NTuple{2,Any}} - @unpack x_neg, x_pos = boundary_conditions - (; x_neg, x_pos) +function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, + cache) where {Keys, ValueTypes <: NTuple{2, Any}} + @unpack x_neg, x_pos = boundary_conditions + (; x_neg, x_pos) end -function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes}, - mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) where {Keys, ValueTypes<:NTuple{4,Any}} - @unpack x_neg, x_pos, y_neg, y_pos = boundary_conditions - (; x_neg, x_pos, y_neg, y_pos) +function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, + cache) where {Keys, ValueTypes <: NTuple{4, Any}} + @unpack x_neg, x_pos, y_neg, y_pos = boundary_conditions + (; x_neg, x_pos, y_neg, y_pos) end -function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes}, - mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) where {Keys, ValueTypes<:NTuple{6,Any}} - @unpack x_neg, x_pos, y_neg, y_pos, z_neg, z_pos = boundary_conditions - (; x_neg, x_pos, y_neg, y_pos, z_neg, z_pos) +function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, + cache) where {Keys, ValueTypes <: NTuple{6, Any}} + @unpack x_neg, x_pos, y_neg, y_pos, z_neg, z_pos = boundary_conditions + (; x_neg, x_pos, y_neg, y_pos, z_neg, z_pos) end # sort the boundary conditions from a dictionary and into tuples function digest_boundary_conditions(boundary_conditions::Dict, mesh, solver, cache) - UnstructuredSortedBoundaryTypes(boundary_conditions, cache) + UnstructuredSortedBoundaryTypes(boundary_conditions, cache) end -function digest_boundary_conditions(boundary_conditions::AbstractArray, mesh, solver, cache) - throw(ArgumentError("Please use a (named) tuple instead of an (abstract) array to supply multiple boundary conditions (to improve performance).")) +function digest_boundary_conditions(boundary_conditions::AbstractArray, mesh, solver, + cache) + throw(ArgumentError("Please use a (named) tuple instead of an (abstract) array to supply multiple boundary conditions (to improve performance).")) end - function Base.show(io::IO, semi::SemidiscretizationHyperbolic) - @nospecialize semi # reduce precompilation time - - print(io, "SemidiscretizationHyperbolic(") - print(io, semi.mesh) - print(io, ", ", semi.equations) - print(io, ", ", semi.initial_condition) - print(io, ", ", semi.boundary_conditions) - print(io, ", ", semi.source_terms) - print(io, ", ", semi.solver) - print(io, ", cache(") - for (idx,key) in enumerate(keys(semi.cache)) - idx > 1 && print(io, " ") - print(io, key) - end - print(io, "))") + @nospecialize semi # reduce precompilation time + + print(io, "SemidiscretizationHyperbolic(") + print(io, semi.mesh) + print(io, ", ", semi.equations) + print(io, ", ", semi.initial_condition) + print(io, ", ", semi.boundary_conditions) + print(io, ", ", semi.source_terms) + print(io, ", ", semi.solver) + print(io, ", cache(") + for (idx, key) in enumerate(keys(semi.cache)) + idx > 1 && print(io, " ") + print(io, key) + end + print(io, "))") end function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperbolic) - @nospecialize semi # reduce precompilation time - - if get(io, :compact, false) - show(io, semi) - else - summary_header(io, "SemidiscretizationHyperbolic") - summary_line(io, "#spatial dimensions", ndims(semi.equations)) - summary_line(io, "mesh", semi.mesh) - summary_line(io, "equations", semi.equations |> typeof |> nameof) - summary_line(io, "initial condition", semi.initial_condition) - - print_boundary_conditions(io, semi) - - summary_line(io, "source terms", semi.source_terms) - summary_line(io, "solver", semi.solver |> typeof |> nameof) - summary_line(io, "total #DOFs", ndofs(semi)) - summary_footer(io) - end + @nospecialize semi # reduce precompilation time + + if get(io, :compact, false) + show(io, semi) + else + summary_header(io, "SemidiscretizationHyperbolic") + summary_line(io, "#spatial dimensions", ndims(semi.equations)) + summary_line(io, "mesh", semi.mesh) + summary_line(io, "equations", semi.equations |> typeof |> nameof) + summary_line(io, "initial condition", semi.initial_condition) + + print_boundary_conditions(io, semi) + + summary_line(io, "source terms", semi.source_terms) + summary_line(io, "solver", semi.solver |> typeof |> nameof) + summary_line(io, "total #DOFs", ndofs(semi)) + summary_footer(io) + end end # type alias for dispatch in printing of boundary conditions +#! format: off const SemiHypMeshBCSolver{Mesh, BoundaryConditions, Solver} = - SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions, - SourceTerms, Solver} where {Equations, InitialCondition, SourceTerms} + SemidiscretizationHyperbolic{Mesh, + Equations, + InitialCondition, + BoundaryConditions, + SourceTerms, + Solver} where {Equations, + InitialCondition, + SourceTerms} +#! format: on # generic fallback: print the type of semi.boundary_condition. -print_boundary_conditions(io, semi::SemiHypMeshBCSolver) = summary_line(io, "boundary conditions", typeof(semi.boundary_conditions)) - -function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Any, <:UnstructuredSortedBoundaryTypes}) - @unpack boundary_conditions = semi - @unpack boundary_dictionary = boundary_conditions - summary_line(io, "boundary conditions", length(boundary_dictionary)) - for (boundary_name, boundary_condition) in boundary_dictionary - summary_line(increment_indent(io), boundary_name, typeof(boundary_condition)) - end +function print_boundary_conditions(io, semi::SemiHypMeshBCSolver) + summary_line(io, "boundary conditions", typeof(semi.boundary_conditions)) +end + +function print_boundary_conditions(io, + semi::SemiHypMeshBCSolver{<:Any, + <:UnstructuredSortedBoundaryTypes + }) + @unpack boundary_conditions = semi + @unpack boundary_dictionary = boundary_conditions + summary_line(io, "boundary conditions", length(boundary_dictionary)) + for (boundary_name, boundary_condition) in boundary_dictionary + summary_line(increment_indent(io), boundary_name, typeof(boundary_condition)) + end end function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Any, <:NamedTuple}) - @unpack boundary_conditions = semi - summary_line(io, "boundary conditions", length(boundary_conditions)) - bc_names = keys(boundary_conditions) - for (i, bc_name) in enumerate(bc_names) - summary_line(increment_indent(io), String(bc_name), typeof(boundary_conditions[i])) - end + @unpack boundary_conditions = semi + summary_line(io, "boundary conditions", length(boundary_conditions)) + bc_names = keys(boundary_conditions) + for (i, bc_name) in enumerate(bc_names) + summary_line(increment_indent(io), String(bc_name), + typeof(boundary_conditions[i])) + end end -function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Union{TreeMesh, StructuredMesh}, <:Union{Tuple,NamedTuple,AbstractArray}}) - summary_line(io, "boundary conditions", 2*ndims(semi)) - bcs = semi.boundary_conditions - - summary_line(increment_indent(io), "negative x", bcs[1]) - summary_line(increment_indent(io), "positive x", bcs[2]) - if ndims(semi) > 1 - summary_line(increment_indent(io), "negative y", bcs[3]) - summary_line(increment_indent(io), "positive y", bcs[4]) - end - if ndims(semi) > 2 - summary_line(increment_indent(io), "negative z", bcs[5]) - summary_line(increment_indent(io), "positive z", bcs[6]) - end +function print_boundary_conditions(io, + semi::SemiHypMeshBCSolver{ + <:Union{TreeMesh, + StructuredMesh}, + <:Union{Tuple, NamedTuple, + AbstractArray}}) + summary_line(io, "boundary conditions", 2 * ndims(semi)) + bcs = semi.boundary_conditions + + summary_line(increment_indent(io), "negative x", bcs[1]) + summary_line(increment_indent(io), "positive x", bcs[2]) + if ndims(semi) > 1 + summary_line(increment_indent(io), "negative y", bcs[3]) + summary_line(increment_indent(io), "positive y", bcs[4]) + end + if ndims(semi) > 2 + summary_line(increment_indent(io), "negative z", bcs[5]) + summary_line(increment_indent(io), "positive z", bcs[6]) + end end @inline Base.ndims(semi::SemidiscretizationHyperbolic) = ndims(semi.mesh) @@ -261,45 +315,42 @@ end @inline Base.real(semi::SemidiscretizationHyperbolic) = real(semi.solver) - @inline function mesh_equations_solver_cache(semi::SemidiscretizationHyperbolic) - @unpack mesh, equations, solver, cache = semi - return mesh, equations, solver, cache + @unpack mesh, equations, solver, cache = semi + return mesh, equations, solver, cache end +function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolic, + cache_analysis) + @unpack mesh, equations, initial_condition, solver, cache = semi + u = wrap_array(u_ode, mesh, equations, solver, cache) -function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolic, cache_analysis) - @unpack mesh, equations, initial_condition, solver, cache = semi - u = wrap_array(u_ode, mesh, equations, solver, cache) - - calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, cache, cache_analysis) + calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, + cache, cache_analysis) end - function compute_coefficients(t, semi::SemidiscretizationHyperbolic) - # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl` - compute_coefficients(semi.initial_condition, t, semi) + # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl` + compute_coefficients(semi.initial_condition, t, semi) end function compute_coefficients!(u_ode, t, semi::SemidiscretizationHyperbolic) - compute_coefficients!(u_ode, semi.initial_condition, t, semi) + compute_coefficients!(u_ode, semi.initial_condition, t, semi) end - function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) - @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi + @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi - u = wrap_array(u_ode, mesh, equations, solver, cache) - du = wrap_array(du_ode, mesh, equations, solver, cache) + u = wrap_array(u_ode, mesh, equations, solver, cache) + du = wrap_array(du_ode, mesh, equations, solver, cache) - # TODO: Taal decide, do we need to pass the mesh? - time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) - runtime = time_ns() - time_start - put!(semi.performance_counter, runtime) + # TODO: Taal decide, do we need to pass the mesh? + time_start = time_ns() + @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition, + boundary_conditions, source_terms, solver, cache) + runtime = time_ns() - time_start + put!(semi.performance_counter, runtime) - return nothing + return nothing end - - end # @muladd diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index 9d85034c805..f54bc744164 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ SemidiscretizationHyperbolicParabolic @@ -11,46 +11,75 @@ A struct containing everything needed to describe a spatial semidiscretization of a mixed hyperbolic-parabolic conservation law. """ -struct SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, InitialCondition, - BoundaryConditions, BoundaryConditionsParabolic, - SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic} <: AbstractSemidiscretization - - mesh::Mesh - - equations::Equations - equations_parabolic::EquationsParabolic - - # This guy is a bit messy since we abuse it as some kind of "exact solution" - # although this doesn't really exist... - initial_condition::InitialCondition - - boundary_conditions::BoundaryConditions - boundary_conditions_parabolic::BoundaryConditionsParabolic - - source_terms::SourceTerms - - solver::Solver - solver_parabolic::SolverParabolic - - cache::Cache - cache_parabolic::CacheParabolic - - performance_counter::PerformanceCounterList{2} - - function SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, InitialCondition, BoundaryConditions, BoundaryConditionsParabolic, SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic}( - mesh::Mesh, equations::Equations, equations_parabolic::EquationsParabolic, initial_condition::InitialCondition, - boundary_conditions::BoundaryConditions, boundary_conditions_parabolic::BoundaryConditionsParabolic, - source_terms::SourceTerms, solver::Solver, solver_parabolic::SolverParabolic, cache::Cache, cache_parabolic::CacheParabolic) where {Mesh, Equations, EquationsParabolic, InitialCondition, BoundaryConditions, BoundaryConditionsParabolic, SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic} - @assert ndims(mesh) == ndims(equations) - - # Todo: assert nvariables(equations)==nvariables(equations_parabolic) - - performance_counter = PerformanceCounterList{2}(false) - - new(mesh, equations, equations_parabolic, initial_condition, - boundary_conditions, boundary_conditions_parabolic, - source_terms, solver, solver_parabolic, cache, cache_parabolic, performance_counter) - end +struct SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, + InitialCondition, + BoundaryConditions, + BoundaryConditionsParabolic, + SourceTerms, Solver, SolverParabolic, + Cache, CacheParabolic} <: + AbstractSemidiscretization + mesh::Mesh + + equations::Equations + equations_parabolic::EquationsParabolic + + # This guy is a bit messy since we abuse it as some kind of "exact solution" + # although this doesn't really exist... + initial_condition::InitialCondition + + boundary_conditions::BoundaryConditions + boundary_conditions_parabolic::BoundaryConditionsParabolic + + source_terms::SourceTerms + + solver::Solver + solver_parabolic::SolverParabolic + + cache::Cache + cache_parabolic::CacheParabolic + + performance_counter::PerformanceCounterList{2} + + function SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, + InitialCondition, BoundaryConditions, + BoundaryConditionsParabolic, + SourceTerms, Solver, + SolverParabolic, Cache, + CacheParabolic + }(mesh::Mesh, + equations::Equations, + equations_parabolic::EquationsParabolic, + initial_condition::InitialCondition, + boundary_conditions::BoundaryConditions, + boundary_conditions_parabolic::BoundaryConditionsParabolic, + source_terms::SourceTerms, + solver::Solver, + solver_parabolic::SolverParabolic, + cache::Cache, + cache_parabolic::CacheParabolic) where { + Mesh, + Equations, + EquationsParabolic, + InitialCondition, + BoundaryConditions, + BoundaryConditionsParabolic, + SourceTerms, + Solver, + SolverParabolic, + Cache, + CacheParabolic + } + @assert ndims(mesh) == ndims(equations) + + # Todo: assert nvariables(equations)==nvariables(equations_parabolic) + + performance_counter = PerformanceCounterList{2}(false) + + new(mesh, equations, equations_parabolic, initial_condition, + boundary_conditions, boundary_conditions_parabolic, + source_terms, solver, solver_parabolic, cache, cache_parabolic, + performance_counter) + end end """ @@ -66,150 +95,174 @@ Construct a semidiscretization of a hyperbolic-parabolic PDE. """ function SemidiscretizationHyperbolicParabolic(mesh, equations::Tuple, initial_condition, solver; - solver_parabolic=default_parabolic_solver(), - source_terms=nothing, - boundary_conditions=(boundary_condition_periodic, boundary_condition_periodic), + solver_parabolic = default_parabolic_solver(), + source_terms = nothing, + boundary_conditions = (boundary_condition_periodic, + boundary_condition_periodic), # `RealT` is used as real type for node locations etc. # while `uEltype` is used as element type of solutions etc. - RealT=real(solver), uEltype=RealT, - initial_caches=(NamedTuple(), NamedTuple())) - - equations_hyperbolic, equations_parabolic = equations - boundary_conditions_hyperbolic, boundary_conditions_parabolic = boundary_conditions - initial_hyperbolic_cache, initial_cache_parabolic = initial_caches - - return SemidiscretizationHyperbolicParabolic(mesh, equations_hyperbolic, equations_parabolic, - initial_condition, solver; solver_parabolic, source_terms, - boundary_conditions=boundary_conditions_hyperbolic, - boundary_conditions_parabolic=boundary_conditions_parabolic, - RealT, uEltype, initial_cache=initial_hyperbolic_cache, - initial_cache_parabolic=initial_cache_parabolic) + RealT = real(solver), uEltype = RealT, + initial_caches = (NamedTuple(), + NamedTuple())) + equations_hyperbolic, equations_parabolic = equations + boundary_conditions_hyperbolic, boundary_conditions_parabolic = boundary_conditions + initial_hyperbolic_cache, initial_cache_parabolic = initial_caches + + return SemidiscretizationHyperbolicParabolic(mesh, equations_hyperbolic, + equations_parabolic, + initial_condition, solver; + solver_parabolic, source_terms, + boundary_conditions = boundary_conditions_hyperbolic, + boundary_conditions_parabolic = boundary_conditions_parabolic, + RealT, uEltype, + initial_cache = initial_hyperbolic_cache, + initial_cache_parabolic = initial_cache_parabolic) end function SemidiscretizationHyperbolicParabolic(mesh, equations, equations_parabolic, initial_condition, solver; - solver_parabolic=default_parabolic_solver(), - source_terms=nothing, - boundary_conditions=boundary_condition_periodic, - boundary_conditions_parabolic=boundary_condition_periodic, + solver_parabolic = default_parabolic_solver(), + source_terms = nothing, + boundary_conditions = boundary_condition_periodic, + boundary_conditions_parabolic = boundary_condition_periodic, # `RealT` is used as real type for node locations etc. # while `uEltype` is used as element type of solutions etc. - RealT=real(solver), uEltype=RealT, - initial_cache=NamedTuple(), - initial_cache_parabolic=NamedTuple()) - - cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., initial_cache...) - _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, cache) - _boundary_conditions_parabolic = digest_boundary_conditions(boundary_conditions_parabolic, mesh, solver, cache) - - cache_parabolic = (; create_cache_parabolic(mesh, equations, equations_parabolic, - solver, solver_parabolic, RealT, uEltype)..., - initial_cache_parabolic...) - - SemidiscretizationHyperbolicParabolic{typeof(mesh), typeof(equations), typeof(equations_parabolic), - typeof(initial_condition), typeof(_boundary_conditions), typeof(_boundary_conditions_parabolic), - typeof(source_terms), typeof(solver), typeof(solver_parabolic), typeof(cache), typeof(cache_parabolic)}( - mesh, equations, equations_parabolic, initial_condition, - _boundary_conditions, _boundary_conditions_parabolic, source_terms, - solver, solver_parabolic, cache, cache_parabolic) + RealT = real(solver), uEltype = RealT, + initial_cache = NamedTuple(), + initial_cache_parabolic = NamedTuple()) + cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., + initial_cache...) + _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, + cache) + _boundary_conditions_parabolic = digest_boundary_conditions(boundary_conditions_parabolic, + mesh, solver, cache) + + cache_parabolic = (; + create_cache_parabolic(mesh, equations, equations_parabolic, + solver, solver_parabolic, RealT, + uEltype)..., + initial_cache_parabolic...) + + SemidiscretizationHyperbolicParabolic{typeof(mesh), typeof(equations), + typeof(equations_parabolic), + typeof(initial_condition), + typeof(_boundary_conditions), + typeof(_boundary_conditions_parabolic), + typeof(source_terms), typeof(solver), + typeof(solver_parabolic), typeof(cache), + typeof(cache_parabolic)}(mesh, equations, + equations_parabolic, + initial_condition, + _boundary_conditions, + _boundary_conditions_parabolic, + source_terms, + solver, + solver_parabolic, + cache, + cache_parabolic) end - # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the # semantics we want to use here. In particular, it allows us to re-use mutable parts, # e.g. `remake(semi).mesh === semi.mesh`. -function remake(semi::SemidiscretizationHyperbolicParabolic; uEltype=real(semi.solver), - mesh=semi.mesh, - equations=semi.equations, - equations_parabolic=semi.equations_parabolic, - initial_condition=semi.initial_condition, - solver=semi.solver, - solver_parabolic=semi.solver_parabolic, - source_terms=semi.source_terms, - boundary_conditions=semi.boundary_conditions, - boundary_conditions_parabolic=semi.boundary_conditions_parabolic - ) - # TODO: Which parts do we want to `remake`? At least the solver needs some - # special care if shock-capturing volume integrals are used (because of - # the indicators and their own caches...). - SemidiscretizationHyperbolicParabolic( - mesh, equations, equations_parabolic, initial_condition, solver; solver_parabolic, source_terms, boundary_conditions, boundary_conditions_parabolic, uEltype) +function remake(semi::SemidiscretizationHyperbolicParabolic; + uEltype = real(semi.solver), + mesh = semi.mesh, + equations = semi.equations, + equations_parabolic = semi.equations_parabolic, + initial_condition = semi.initial_condition, + solver = semi.solver, + solver_parabolic = semi.solver_parabolic, + source_terms = semi.source_terms, + boundary_conditions = semi.boundary_conditions, + boundary_conditions_parabolic = semi.boundary_conditions_parabolic) + # TODO: Which parts do we want to `remake`? At least the solver needs some + # special care if shock-capturing volume integrals are used (because of + # the indicators and their own caches...). + SemidiscretizationHyperbolicParabolic(mesh, equations, equations_parabolic, + initial_condition, solver; solver_parabolic, + source_terms, boundary_conditions, + boundary_conditions_parabolic, uEltype) end function Base.show(io::IO, semi::SemidiscretizationHyperbolicParabolic) - @nospecialize semi # reduce precompilation time - - print(io, "SemidiscretizationHyperbolicParabolic(") - print(io, semi.mesh) - print(io, ", ", semi.equations) - print(io, ", ", semi.equations_parabolic) - print(io, ", ", semi.initial_condition) - print(io, ", ", semi.boundary_conditions) - print(io, ", ", semi.boundary_conditions_parabolic) - print(io, ", ", semi.source_terms) - print(io, ", ", semi.solver) - print(io, ", ", semi.solver_parabolic) - print(io, ", cache(") - for (idx,key) in enumerate(keys(semi.cache)) - idx > 1 && print(io, " ") - print(io, key) - end - print(io, "))") + @nospecialize semi # reduce precompilation time + + print(io, "SemidiscretizationHyperbolicParabolic(") + print(io, semi.mesh) + print(io, ", ", semi.equations) + print(io, ", ", semi.equations_parabolic) + print(io, ", ", semi.initial_condition) + print(io, ", ", semi.boundary_conditions) + print(io, ", ", semi.boundary_conditions_parabolic) + print(io, ", ", semi.source_terms) + print(io, ", ", semi.solver) + print(io, ", ", semi.solver_parabolic) + print(io, ", cache(") + for (idx, key) in enumerate(keys(semi.cache)) + idx > 1 && print(io, " ") + print(io, key) + end + print(io, "))") end -function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperbolicParabolic) - @nospecialize semi # reduce precompilation time - - if get(io, :compact, false) - show(io, semi) - else - summary_header(io, "SemidiscretizationHyperbolicParabolic") - summary_line(io, "#spatial dimensions", ndims(semi.equations)) - summary_line(io, "mesh", semi.mesh) - summary_line(io, "hyperbolic equations", semi.equations |> typeof |> nameof) - summary_line(io, "parabolic equations", semi.equations_parabolic |> typeof |> nameof) - summary_line(io, "initial condition", semi.initial_condition) - - # print_boundary_conditions(io, semi) - - summary_line(io, "source terms", semi.source_terms) - summary_line(io, "solver", semi.solver |> typeof |> nameof) - summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof) - summary_line(io, "total #DOFs", ndofs(semi)) - summary_footer(io) - end +function Base.show(io::IO, ::MIME"text/plain", + semi::SemidiscretizationHyperbolicParabolic) + @nospecialize semi # reduce precompilation time + + if get(io, :compact, false) + show(io, semi) + else + summary_header(io, "SemidiscretizationHyperbolicParabolic") + summary_line(io, "#spatial dimensions", ndims(semi.equations)) + summary_line(io, "mesh", semi.mesh) + summary_line(io, "hyperbolic equations", semi.equations |> typeof |> nameof) + summary_line(io, "parabolic equations", + semi.equations_parabolic |> typeof |> nameof) + summary_line(io, "initial condition", semi.initial_condition) + + # print_boundary_conditions(io, semi) + + summary_line(io, "source terms", semi.source_terms) + summary_line(io, "solver", semi.solver |> typeof |> nameof) + summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof) + summary_line(io, "total #DOFs", ndofs(semi)) + summary_footer(io) + end end @inline Base.ndims(semi::SemidiscretizationHyperbolicParabolic) = ndims(semi.mesh) -@inline nvariables(semi::SemidiscretizationHyperbolicParabolic) = nvariables(semi.equations) +@inline function nvariables(semi::SemidiscretizationHyperbolicParabolic) + nvariables(semi.equations) +end @inline Base.real(semi::SemidiscretizationHyperbolicParabolic) = real(semi.solver) # retain dispatch on hyperbolic equations only @inline function mesh_equations_solver_cache(semi::SemidiscretizationHyperbolicParabolic) - @unpack mesh, equations, solver, cache = semi - return mesh, equations, solver, cache + @unpack mesh, equations, solver, cache = semi + return mesh, equations, solver, cache end +function calc_error_norms(func, u_ode, t, analyzer, + semi::SemidiscretizationHyperbolicParabolic, cache_analysis) + @unpack mesh, equations, initial_condition, solver, cache = semi + u = wrap_array(u_ode, mesh, equations, solver, cache) -function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolicParabolic, cache_analysis) - @unpack mesh, equations, initial_condition, solver, cache = semi - u = wrap_array(u_ode, mesh, equations, solver, cache) - - calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, cache, cache_analysis) + calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, + cache, cache_analysis) end - function compute_coefficients(t, semi::SemidiscretizationHyperbolicParabolic) - # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl` - compute_coefficients(semi.initial_condition, t, semi) + # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl` + compute_coefficients(semi.initial_condition, t, semi) end function compute_coefficients!(u_ode, t, semi::SemidiscretizationHyperbolicParabolic) - compute_coefficients!(u_ode, semi.initial_condition, t, semi) + compute_coefficients!(u_ode, semi.initial_condition, t, semi) end """ @@ -222,49 +275,51 @@ will be used by default by the implicit part of IMEX methods from the SciML ecosystem. """ function semidiscretize(semi::SemidiscretizationHyperbolicParabolic, tspan) - u0_ode = compute_coefficients(first(tspan), semi) - # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using - # mpi_isparallel() && MPI.Barrier(mpi_comm()) - # See https://github.com/trixi-framework/Trixi.jl/issues/328 - iip = true # is-inplace, i.e., we modify a vector when calling rhs_parabolic!, rhs! - # Note that the IMEX time integration methods of OrdinaryDiffEq.jl treat the - # first function implicitly and the second one explicitly. Thus, we pass the - # stiffer parabolic function first. - return SplitODEProblem{iip}(rhs_parabolic!, rhs!, u0_ode, tspan, semi) + u0_ode = compute_coefficients(first(tspan), semi) + # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using + # mpi_isparallel() && MPI.Barrier(mpi_comm()) + # See https://github.com/trixi-framework/Trixi.jl/issues/328 + iip = true # is-inplace, i.e., we modify a vector when calling rhs_parabolic!, rhs! + # Note that the IMEX time integration methods of OrdinaryDiffEq.jl treat the + # first function implicitly and the second one explicitly. Thus, we pass the + # stiffer parabolic function first. + return SplitODEProblem{iip}(rhs_parabolic!, rhs!, u0_ode, tspan, semi) end function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) - @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi + @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi - u = wrap_array(u_ode, mesh, equations, solver, cache) - du = wrap_array(du_ode, mesh, equations, solver, cache) + u = wrap_array(u_ode, mesh, equations, solver, cache) + du = wrap_array(du_ode, mesh, equations, solver, cache) - # TODO: Taal decide, do we need to pass the mesh? - time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition, - boundary_conditions, source_terms, solver, cache) - runtime = time_ns() - time_start - put!(semi.performance_counter.counters[1], runtime) + # TODO: Taal decide, do we need to pass the mesh? + time_start = time_ns() + @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition, + boundary_conditions, source_terms, solver, cache) + runtime = time_ns() - time_start + put!(semi.performance_counter.counters[1], runtime) - return nothing + return nothing end function rhs_parabolic!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) - @unpack mesh, equations_parabolic, initial_condition, boundary_conditions_parabolic, source_terms, solver, solver_parabolic, cache, cache_parabolic = semi - - u = wrap_array(u_ode, mesh, equations_parabolic, solver, cache_parabolic) - du = wrap_array(du_ode, mesh, equations_parabolic, solver, cache_parabolic) - - # TODO: Taal decide, do we need to pass the mesh? - time_start = time_ns() - @trixi_timeit timer() "parabolic rhs!" rhs_parabolic!(du, u, t, mesh, equations_parabolic, initial_condition, - boundary_conditions_parabolic, source_terms, - solver, solver_parabolic, cache, cache_parabolic) - runtime = time_ns() - time_start - put!(semi.performance_counter.counters[2], runtime) - - return nothing + @unpack mesh, equations_parabolic, initial_condition, boundary_conditions_parabolic, source_terms, solver, solver_parabolic, cache, cache_parabolic = semi + + u = wrap_array(u_ode, mesh, equations_parabolic, solver, cache_parabolic) + du = wrap_array(du_ode, mesh, equations_parabolic, solver, cache_parabolic) + + # TODO: Taal decide, do we need to pass the mesh? + time_start = time_ns() + @trixi_timeit timer() "parabolic rhs!" rhs_parabolic!(du, u, t, mesh, + equations_parabolic, + initial_condition, + boundary_conditions_parabolic, + source_terms, + solver, solver_parabolic, + cache, cache_parabolic) + runtime = time_ns() - time_start + put!(semi.performance_counter.counters[2], runtime) + + return nothing end - - end # @muladd diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index fc6420791bb..838fa2d5819 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -3,13 +3,14 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent abstract type AbstractVolumeIntegral end -get_element_variables!(element_variables, u, mesh, equations, - volume_integral::AbstractVolumeIntegral, dg, cache) = nothing - +function get_element_variables!(element_variables, u, mesh, equations, + volume_integral::AbstractVolumeIntegral, dg, cache) + nothing +end """ VolumeIntegralStrongForm() @@ -18,7 +19,6 @@ The classical strong form volume integral type for FD/DG methods. """ struct VolumeIntegralStrongForm <: AbstractVolumeIntegral end - """ VolumeIntegralWeakForm() @@ -40,7 +40,6 @@ struct VolumeIntegralWeakForm <: AbstractVolumeIntegral end create_cache(mesh, equations, ::VolumeIntegralWeakForm, dg, uEltype) = NamedTuple() - """ VolumeIntegralFluxDifferencing(volume_flux) @@ -67,23 +66,22 @@ the interface of numerical fluxes in Trixi.jl. [doi: 10.1016/j.jcp.2017.05.025](https://doi.org/10.1016/j.jcp.2017.05.025) """ struct VolumeIntegralFluxDifferencing{VolumeFlux} <: AbstractVolumeIntegral - volume_flux::VolumeFlux + volume_flux::VolumeFlux end function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralFluxDifferencing) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "volume flux" => integral.volume_flux - ] - summary_box(io, "VolumeIntegralFluxDifferencing", setup) - end + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "volume flux" => integral.volume_flux, + ] + summary_box(io, "VolumeIntegralFluxDifferencing", setup) + end end - """ VolumeIntegralShockCapturingHG(indicator; volume_flux_dg=flux_central, volume_flux_fv=flux_lax_friedrichs) @@ -100,41 +98,45 @@ The amount of blending is determined by the `indicator`, e.g., "A provably entropy stable subcell shock capturing approach for high order split form DG" [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -struct VolumeIntegralShockCapturingHG{VolumeFluxDG, VolumeFluxFV, Indicator} <: AbstractVolumeIntegral - volume_flux_dg::VolumeFluxDG # symmetric, e.g. split-form or entropy-conservative - volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative - indicator::Indicator -end - -function VolumeIntegralShockCapturingHG(indicator; volume_flux_dg=flux_central, - volume_flux_fv=flux_lax_friedrichs) - VolumeIntegralShockCapturingHG{typeof(volume_flux_dg), typeof(volume_flux_fv), typeof(indicator)}( - volume_flux_dg, volume_flux_fv, indicator) -end - -function Base.show(io::IO, mime::MIME"text/plain", integral::VolumeIntegralShockCapturingHG) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - summary_header(io, "VolumeIntegralShockCapturingHG") - summary_line(io, "volume flux DG", integral.volume_flux_dg) - summary_line(io, "volume flux FV", integral.volume_flux_fv) - summary_line(io, "indicator", integral.indicator |> typeof |> nameof) - show(increment_indent(io), mime, integral.indicator) - summary_footer(io) - end +struct VolumeIntegralShockCapturingHG{VolumeFluxDG, VolumeFluxFV, Indicator} <: + AbstractVolumeIntegral + volume_flux_dg::VolumeFluxDG # symmetric, e.g. split-form or entropy-conservative + volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative + indicator::Indicator +end + +function VolumeIntegralShockCapturingHG(indicator; volume_flux_dg = flux_central, + volume_flux_fv = flux_lax_friedrichs) + VolumeIntegralShockCapturingHG{typeof(volume_flux_dg), typeof(volume_flux_fv), + typeof(indicator)}(volume_flux_dg, volume_flux_fv, + indicator) +end + +function Base.show(io::IO, mime::MIME"text/plain", + integral::VolumeIntegralShockCapturingHG) + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + summary_header(io, "VolumeIntegralShockCapturingHG") + summary_line(io, "volume flux DG", integral.volume_flux_dg) + summary_line(io, "volume flux FV", integral.volume_flux_fv) + summary_line(io, "indicator", integral.indicator |> typeof |> nameof) + show(increment_indent(io), mime, integral.indicator) + summary_footer(io) + end end function get_element_variables!(element_variables, u, mesh, equations, - volume_integral::VolumeIntegralShockCapturingHG, dg, cache) - # call the indicator to get up-to-date values for IO - volume_integral.indicator(u, mesh, equations, dg, cache) - get_element_variables!(element_variables, volume_integral.indicator, volume_integral) + volume_integral::VolumeIntegralShockCapturingHG, dg, + cache) + # call the indicator to get up-to-date values for IO + volume_integral.indicator(u, mesh, equations, dg, cache) + get_element_variables!(element_variables, volume_integral.indicator, + volume_integral) end - """ VolumeIntegralPureLGLFiniteVolume(volume_flux_fv) @@ -154,24 +156,24 @@ mesh (LGL = Legendre-Gauss-Lobatto). [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ struct VolumeIntegralPureLGLFiniteVolume{VolumeFluxFV} <: AbstractVolumeIntegral - volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative + volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative end # TODO: Figure out if this can also be used for Gauss nodes, not just LGL, and adjust the name accordingly -function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralPureLGLFiniteVolume) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "FV flux" => integral.volume_flux_fv - ] - summary_box(io, "VolumeIntegralPureLGLFiniteVolume", setup) - end +function Base.show(io::IO, ::MIME"text/plain", + integral::VolumeIntegralPureLGLFiniteVolume) + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "FV flux" => integral.volume_flux_fv, + ] + summary_box(io, "VolumeIntegralPureLGLFiniteVolume", setup) + end end - # TODO: FD. Should this definition live in a different file because it is # not strictly a DG method? """ @@ -195,23 +197,22 @@ See also [`splitting_steger_warming`](@ref), [`splitting_lax_friedrichs`](@ref), This is an experimental feature and may change in future releases. """ struct VolumeIntegralUpwind{FluxSplitting} <: AbstractVolumeIntegral - splitting::FluxSplitting + splitting::FluxSplitting end function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralUpwind) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "flux splitting" => integral.splitting - ] - summary_box(io, "VolumeIntegralUpwind", setup) - end + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "flux splitting" => integral.splitting, + ] + summary_box(io, "VolumeIntegralUpwind", setup) + end end - abstract type AbstractSurfaceIntegral end """ @@ -234,25 +235,24 @@ See also [`VolumeIntegralWeakForm`](@ref). [doi: 10.1007/978-0-387-72067-8](https://doi.org/10.1007/978-0-387-72067-8) """ struct SurfaceIntegralWeakForm{SurfaceFlux} <: AbstractSurfaceIntegral - surface_flux::SurfaceFlux + surface_flux::SurfaceFlux end SurfaceIntegralWeakForm() = SurfaceIntegralWeakForm(flux_central) function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralWeakForm) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "surface flux" => integral.surface_flux - ] - summary_box(io, "SurfaceIntegralWeakForm", setup) - end + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "surface flux" => integral.surface_flux, + ] + summary_box(io, "SurfaceIntegralWeakForm", setup) + end end - """ SurfaceIntegralStrongForm(surface_flux=flux_central) @@ -261,25 +261,24 @@ The classical strong form surface integral type for FD/DG methods. See also [`VolumeIntegralStrongForm`](@ref). """ struct SurfaceIntegralStrongForm{SurfaceFlux} <: AbstractSurfaceIntegral - surface_flux::SurfaceFlux + surface_flux::SurfaceFlux end SurfaceIntegralStrongForm() = SurfaceIntegralStrongForm(flux_central) function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralStrongForm) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "surface flux" => integral.surface_flux - ] - summary_box(io, "SurfaceIntegralStrongForm", setup) - end + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "surface flux" => integral.surface_flux, + ] + summary_box(io, "SurfaceIntegralStrongForm", setup) + end end - # TODO: FD. Should this definition live in a different file because it is # not strictly a DG method? """ @@ -295,23 +294,22 @@ See also [`VolumeIntegralUpwind`](@ref). This is an experimental feature and may change in future releases. """ struct SurfaceIntegralUpwind{FluxSplitting} <: AbstractSurfaceIntegral - splitting::FluxSplitting + splitting::FluxSplitting end function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralUpwind) - @nospecialize integral # reduce precompilation time - - if get(io, :compact, false) - show(io, integral) - else - setup = [ - "flux splitting" => integral.splitting - ] - summary_box(io, "SurfaceIntegralUpwind", setup) - end + @nospecialize integral # reduce precompilation time + + if get(io, :compact, false) + show(io, integral) + else + setup = [ + "flux splitting" => integral.splitting, + ] + summary_box(io, "SurfaceIntegralUpwind", setup) + end end - """ DG(; basis, mortar, surface_integral, volume_integral) @@ -320,55 +318,56 @@ If [`basis isa LobattoLegendreBasis`](@ref LobattoLegendreBasis), this creates a [`DGSEM`](@ref). """ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral} - basis::Basis - mortar::Mortar - surface_integral::SurfaceIntegral - volume_integral::VolumeIntegral + basis::Basis + mortar::Mortar + surface_integral::SurfaceIntegral + volume_integral::VolumeIntegral end function Base.show(io::IO, dg::DG) - @nospecialize dg # reduce precompilation time + @nospecialize dg # reduce precompilation time - print(io, "DG{", real(dg), "}(") - print(io, dg.basis) - print(io, ", ", dg.mortar) - print(io, ", ", dg.surface_integral) - print(io, ", ", dg.volume_integral) - print(io, ")") + print(io, "DG{", real(dg), "}(") + print(io, dg.basis) + print(io, ", ", dg.mortar) + print(io, ", ", dg.surface_integral) + print(io, ", ", dg.volume_integral) + print(io, ")") end function Base.show(io::IO, mime::MIME"text/plain", dg::DG) - @nospecialize dg # reduce precompilation time - - if get(io, :compact, false) - show(io, dg) - else - summary_header(io, "DG{" * string(real(dg)) * "}") - summary_line(io, "basis", dg.basis) - summary_line(io, "mortar", dg.mortar) - summary_line(io, "surface integral", dg.surface_integral |> typeof |> nameof) - show(increment_indent(io), mime, dg.surface_integral) - summary_line(io, "volume integral", dg.volume_integral |> typeof |> nameof) - if !(dg.volume_integral isa VolumeIntegralWeakForm) - show(increment_indent(io), mime, dg.volume_integral) + @nospecialize dg # reduce precompilation time + + if get(io, :compact, false) + show(io, dg) + else + summary_header(io, "DG{" * string(real(dg)) * "}") + summary_line(io, "basis", dg.basis) + summary_line(io, "mortar", dg.mortar) + summary_line(io, "surface integral", dg.surface_integral |> typeof |> nameof) + show(increment_indent(io), mime, dg.surface_integral) + summary_line(io, "volume integral", dg.volume_integral |> typeof |> nameof) + if !(dg.volume_integral isa VolumeIntegralWeakForm) + show(increment_indent(io), mime, dg.volume_integral) + end + summary_footer(io) end - summary_footer(io) - end end Base.summary(io::IO, dg::DG) = print(io, "DG(" * summary(dg.basis) * ")") @inline Base.real(dg::DG) = real(dg.basis) - function get_element_variables!(element_variables, u, mesh, equations, dg::DG, cache) - get_element_variables!(element_variables, u, mesh, equations, dg.volume_integral, dg, cache) + get_element_variables!(element_variables, u, mesh, equations, dg.volume_integral, + dg, cache) end - const MeshesDGSEM = Union{TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh} -@inline ndofs(mesh::MeshesDGSEM, dg::DG, cache) = nelements(cache.elements) * nnodes(dg)^ndims(mesh) +@inline function ndofs(mesh::MeshesDGSEM, dg::DG, cache) + nelements(cache.elements) * nnodes(dg)^ndims(mesh) +end # TODO: Taal performance, 1:nnodes(dg) vs. Base.OneTo(nnodes(dg)) vs. SOneTo(nnodes(dg)) for DGSEM """ @@ -379,12 +378,14 @@ for the nodes in `dg`. In particular, not the nodes themselves are returned. """ @inline eachnode(dg::DG) = Base.OneTo(nnodes(dg)) -@inline nnodes(dg::DG) = nnodes(dg.basis) +@inline nnodes(dg::DG) = nnodes(dg.basis) # This is used in some more general analysis code and needs to dispatch on the # `mesh` for some combinations of mesh/solver. @inline nelements(mesh, dg::DG, cache) = nelements(dg, cache) -@inline ndofsglobal(mesh, dg::DG, cache) = nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh) +@inline function ndofsglobal(mesh, dg::DG, cache) + nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh) +end """ eachelement(dg::DG, cache) @@ -393,7 +394,7 @@ Return an iterator over the indices that specify the location in relevant data s for the elements in `cache`. In particular, not the elements themselves are returned. """ -@inline eachelement(dg::DG, cache) = Base.OneTo(nelements(dg, cache)) +@inline eachelement(dg::DG, cache) = Base.OneTo(nelements(dg, cache)) """ eachinterface(dg::DG, cache) @@ -411,7 +412,7 @@ Return an iterator over the indices that specify the location in relevant data s for the boundaries in `cache`. In particular, not the boundaries themselves are returned. """ -@inline eachboundary(dg::DG, cache) = Base.OneTo(nboundaries(dg, cache)) +@inline eachboundary(dg::DG, cache) = Base.OneTo(nboundaries(dg, cache)) """ eachmortar(dg::DG, cache) @@ -420,7 +421,7 @@ Return an iterator over the indices that specify the location in relevant data s for the mortars in `cache`. In particular, not the mortars themselves are returned. """ -@inline eachmortar(dg::DG, cache) = Base.OneTo(nmortars(dg, cache)) +@inline eachmortar(dg::DG, cache) = Base.OneTo(nmortars(dg, cache)) """ eachmpiinterface(dg::DG, cache) @@ -440,15 +441,16 @@ In particular, not the mortars themselves are returned. """ @inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache)) -@inline nelements(dg::DG, cache) = nelements(cache.elements) -@inline nelementsglobal(dg::DG, cache) = mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache) +@inline nelements(dg::DG, cache) = nelements(cache.elements) +@inline function nelementsglobal(dg::DG, cache) + mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache) +end @inline ninterfaces(dg::DG, cache) = ninterfaces(cache.interfaces) @inline nboundaries(dg::DG, cache) = nboundaries(cache.boundaries) -@inline nmortars(dg::DG, cache) = nmortars(cache.mortars) +@inline nmortars(dg::DG, cache) = nmortars(cache.mortars) @inline nmpiinterfaces(dg::DG, cache) = nmpiinterfaces(cache.mpi_interfaces) @inline nmpimortars(dg::DG, cache) = nmpimortars(cache.mpi_mortars) - # The following functions assume an array-of-structs memory layout # We would like to experiment with different memory layout choices # in the future, see @@ -456,66 +458,64 @@ In particular, not the mortars themselves are returned. # - https://github.com/trixi-framework/Trixi.jl/issues/87 # - https://github.com/trixi-framework/Trixi.jl/issues/86 @inline function get_node_coords(x, equations, solver::DG, indices...) - SVector(ntuple(@inline(idx -> x[idx, indices...]), Val(ndims(equations)))) + SVector(ntuple(@inline(idx->x[idx, indices...]), Val(ndims(equations)))) end @inline function get_node_vars(u, equations, solver::DG, indices...) - # There is a cut-off at `n == 10` inside of the method - # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 - # in Julia `v1.5`, leading to type instabilities if - # more than ten variables are used. That's why we use - # `Val(...)` below. - # We use `@inline` to make sure that the `getindex` calls are - # really inlined, which might be the default choice of the Julia - # compiler for standard `Array`s but not necessarily for more - # advanced array types such as `PtrArray`s, cf. - # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55 - SVector(ntuple(@inline(v -> u[v, indices...]), Val(nvariables(equations)))) + # There is a cut-off at `n == 10` inside of the method + # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 + # in Julia `v1.5`, leading to type instabilities if + # more than ten variables are used. That's why we use + # `Val(...)` below. + # We use `@inline` to make sure that the `getindex` calls are + # really inlined, which might be the default choice of the Julia + # compiler for standard `Array`s but not necessarily for more + # advanced array types such as `PtrArray`s, cf. + # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55 + SVector(ntuple(@inline(v->u[v, indices...]), Val(nvariables(equations)))) end @inline function get_surface_node_vars(u, equations, solver::DG, indices...) - # There is a cut-off at `n == 10` inside of the method - # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 - # in Julia `v1.5`, leading to type instabilities if - # more than ten variables are used. That's why we use - # `Val(...)` below. - u_ll = SVector(ntuple(@inline(v -> u[1, v, indices...]), Val(nvariables(equations)))) - u_rr = SVector(ntuple(@inline(v -> u[2, v, indices...]), Val(nvariables(equations)))) - return u_ll, u_rr + # There is a cut-off at `n == 10` inside of the method + # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 + # in Julia `v1.5`, leading to type instabilities if + # more than ten variables are used. That's why we use + # `Val(...)` below. + u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations)))) + return u_ll, u_rr end @inline function set_node_vars!(u, u_node, equations, solver::DG, indices...) - for v in eachvariable(equations) - u[v, indices...] = u_node[v] - end - return nothing + for v in eachvariable(equations) + u[v, indices...] = u_node[v] + end + return nothing end @inline function add_to_node_vars!(u, u_node, equations, solver::DG, indices...) - for v in eachvariable(equations) - u[v, indices...] += u_node[v] - end - return nothing + for v in eachvariable(equations) + u[v, indices...] += u_node[v] + end + return nothing end # Use this function instead of `add_to_node_vars` to speed up # multiply-and-add-to-node-vars operations # See https://github.com/trixi-framework/Trixi.jl/pull/643 -@inline function multiply_add_to_node_vars!(u, factor, u_node, equations, solver::DG, indices...) - for v in eachvariable(equations) - u[v, indices...] = u[v, indices...] + factor * u_node[v] - end - return nothing +@inline function multiply_add_to_node_vars!(u, factor, u_node, equations, solver::DG, + indices...) + for v in eachvariable(equations) + u[v, indices...] = u[v, indices...] + factor * u_node[v] + end + return nothing end - # Used for analyze_solution SolutionAnalyzer(dg::DG; kwargs...) = SolutionAnalyzer(dg.basis; kwargs...) AdaptorAMR(mesh, dg::DG) = AdaptorL2(dg.basis) - - # General structs for discretizations based on the basic principle of # DGSEM (discontinuous Galerkin spectral element method) include("dgsem/dgsem.jl") @@ -526,125 +526,138 @@ include("dgsem/dgsem.jl") # functionality implemented for DGSEM. include("fdsbp_tree/fdsbp.jl") - - function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache) - # We must allocate a `Vector` in order to be able to `resize!` it (AMR). - # cf. wrap_array - zeros(eltype(cache.elements), nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) -end - -@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DGSEM, cache) - @boundscheck begin - @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) - end - # We would like to use - # reshape(u_ode, (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) - # but that results in - # ERROR: LoadError: cannot resize array with shared data - # when we resize! `u_ode` during AMR. - # - # !!! danger "Segfaults" - # Remember to `GC.@preserve` temporaries such as copies of `u_ode` - # and other stuff that is only used indirectly via `wrap_array` afterwards! - - # Currently, there are problems when AD is used with `PtrArray`s in broadcasts - # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use - # optimized `PtrArray`s whenever possible and fall back to plain `Array`s - # otherwise. - if LoopVectorization.check_args(u_ode) - # This version using `PtrArray`s from StrideArrays.jl is very fast and - # does not result in allocations. + # We must allocate a `Vector` in order to be able to `resize!` it (AMR). + # cf. wrap_array + zeros(eltype(cache.elements), + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)) +end + +@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, + dg::DGSEM, cache) + @boundscheck begin + @assert length(u_ode) == + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) + end + # We would like to use + # reshape(u_ode, (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + # but that results in + # ERROR: LoadError: cannot resize array with shared data + # when we resize! `u_ode` during AMR. # - # !!! danger "Heisenbug" - # Do not use this code when `@threaded` uses `Threads.@threads`. There is - # a very strange Heisenbug that makes some parts very slow *sometimes*. - # In fact, everything can be fast and fine for many cases but some parts - # of the RHS evaluation can take *exactly* (!) five seconds randomly... - # Hence, this version should only be used when `@threaded` is based on - # `@batch` from Polyester.jl or something similar. Using Polyester.jl - # is probably the best option since everything will be handed over to - # Chris Elrod, one of the best performance software engineers for Julia. - PtrArray(pointer(u_ode), - (StaticInt(nvariables(equations)), ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., nelements(dg, cache))) - # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) - else - # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode), - (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) - end + # !!! danger "Segfaults" + # Remember to `GC.@preserve` temporaries such as copies of `u_ode` + # and other stuff that is only used indirectly via `wrap_array` afterwards! + + # Currently, there are problems when AD is used with `PtrArray`s in broadcasts + # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use + # optimized `PtrArray`s whenever possible and fall back to plain `Array`s + # otherwise. + if LoopVectorization.check_args(u_ode) + # This version using `PtrArray`s from StrideArrays.jl is very fast and + # does not result in allocations. + # + # !!! danger "Heisenbug" + # Do not use this code when `@threaded` uses `Threads.@threads`. There is + # a very strange Heisenbug that makes some parts very slow *sometimes*. + # In fact, everything can be fast and fine for many cases but some parts + # of the RHS evaluation can take *exactly* (!) five seconds randomly... + # Hence, this version should only be used when `@threaded` is based on + # `@batch` from Polyester.jl or something similar. Using Polyester.jl + # is probably the best option since everything will be handed over to + # Chris Elrod, one of the best performance software engineers for Julia. + PtrArray(pointer(u_ode), + (StaticInt(nvariables(equations)), + ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + nelements(dg, cache))) + # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + else + # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. + unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., + nelements(dg, cache))) + end end # Finite difference summation by parts (FDSBP) methods -@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::FDSBP, cache) - @boundscheck begin - @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) - end - # See comments on the DGSEM version above - if LoopVectorization.check_args(u_ode) - # Here, we do not specialize on the number of nodes using `StaticInt` since - # - it will not be type stable (SBP operators just store it as a runtime value) - # - FD methods tend to use high node counts - PtrArray(pointer(u_ode), - (StaticInt(nvariables(equations)), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) - else - # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode), - (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) - end +@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, + dg::FDSBP, cache) + @boundscheck begin + @assert length(u_ode) == + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) + end + # See comments on the DGSEM version above + if LoopVectorization.check_args(u_ode) + # Here, we do not specialize on the number of nodes using `StaticInt` since + # - it will not be type stable (SBP operators just store it as a runtime value) + # - FD methods tend to use high node counts + PtrArray(pointer(u_ode), + (StaticInt(nvariables(equations)), + ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) + else + # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. + unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., + nelements(dg, cache))) + end end # General fallback -@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DG, cache) - wrap_array_native(u_ode, mesh, equations, dg, cache) +@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, + dg::DG, cache) + wrap_array_native(u_ode, mesh, equations, dg, cache) end # Like `wrap_array`, but guarantees to return a plain `Array`, which can be better # for interfacing with external C libraries (MPI, HDF5, visualization), # writing solution files etc. -@inline function wrap_array_native(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DG, cache) - @boundscheck begin - @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) - end - unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode), - (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) -end - - -function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, element) +@inline function wrap_array_native(u_ode::AbstractVector, mesh::AbstractMesh, equations, + dg::DG, cache) + @boundscheck begin + @assert length(u_ode) == + nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) + end + unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., + nelements(dg, cache))) +end + +function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG, + cache) + @threaded for element in eachelement(dg, cache) + for i in eachnode(dg) + x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + element) + u_node = func(x_node, t, equations) + set_node_vars!(u, u_node, equations, dg, i, element) + end end - end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) +function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, + cache) + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + j, element) + u_node = func(x_node, t, equations) + set_node_vars!(u, u_node, equations, dg, i, j, element) + end end - end end -function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, k, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, k, element) +function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, + cache) + @threaded for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + j, k, element) + u_node = func(x_node, t, equations) + set_node_vars!(u, u_node, equations, dg, i, j, k, element) + end end - end end - # Discretizations specific to each mesh type of Trixi.jl # If some functionality is shared by multiple combinations of meshes/solvers, # it is defined in the directory of the most basic mesh and solver type. @@ -657,6 +670,4 @@ include("dgsem_tree/dg.jl") include("dgsem_structured/dg.jl") include("dgsem_unstructured/dg.jl") include("dgsem_p4est/dg.jl") - - end # @muladd diff --git a/src/solvers/dgmulti.jl b/src/solvers/dgmulti.jl index 8ff27db0cd9..363d91b5a4c 100644 --- a/src/solvers/dgmulti.jl +++ b/src/solvers/dgmulti.jl @@ -14,4 +14,4 @@ include("dgmulti/flux_differencing_compressible_euler.jl") include("dgmulti/shock_capturing.jl") # parabolic terms for DGMulti solvers -include("dgmulti/dg_parabolic.jl") \ No newline at end of file +include("dgmulti/dg_parabolic.jl") diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index c9b7f5f021d..d51c7cabf9d 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -3,17 +3,21 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # out <- A*x mul_by!(A) = @inline (out, x) -> matmul!(out, A, x) mul_by!(A::T) where {T <: SimpleKronecker} = @inline (out, x) -> mul!(out, A, x) mul_by!(A::AbstractSparseMatrix) = @inline (out, x) -> mul!(out, A, x) -mul_by!(A::LinearAlgebra.AdjOrTrans{T, S}) where {T, S<:AbstractSparseMatrix} = @inline (out, x) -> mul!(out, A, x) +function mul_by!(A::LinearAlgebra.AdjOrTrans{T, S}) where {T, S <: AbstractSparseMatrix} + @inline (out, x) -> mul!(out, A, x) +end # out <- out + α * A * x mul_by_accum!(A, α) = @inline (out, x) -> matmul!(out, A, x, α, One()) -mul_by_accum!(A::AbstractSparseMatrix, α) = @inline (out, x) -> mul!(out, A, x, α, One()) +function mul_by_accum!(A::AbstractSparseMatrix, α) + @inline (out, x) -> mul!(out, A, x, α, One()) +end # out <- out + A * x mul_by_accum!(A) = mul_by_accum!(A, One()) @@ -25,15 +29,17 @@ mul_by!(A::UniformScaling) = MulByUniformScaling() mul_by_accum!(A::UniformScaling) = MulByAccumUniformScaling() # StructArray fallback -@inline apply_to_each_field(f::F, args::Vararg{Any, N}) where {F, N} = StructArrays.foreachfield(f, args...) +@inline function apply_to_each_field(f::F, args::Vararg{Any, N}) where {F, N} + StructArrays.foreachfield(f, args...) +end # specialize for UniformScaling types: works for either StructArray{SVector} or Matrix{SVector} # solution storage formats. @inline apply_to_each_field(f::MulByUniformScaling, out, x, args...) = copy!(out, x) @inline function apply_to_each_field(f::MulByAccumUniformScaling, out, x, args...) - @threaded for i in eachindex(x) - out[i] = out[i] + x[i] - end + @threaded for i in eachindex(x) + out[i] = out[i] + x[i] + end end """ @@ -46,7 +52,9 @@ In particular, not the dimensions themselves are returned. @inline eachdim(mesh) = Base.OneTo(ndims(mesh)) # iteration over all elements in a mesh -@inline ndofs(mesh::DGMultiMesh, dg::DGMulti, other_args...) = dg.basis.Np * mesh.md.num_elements +@inline function ndofs(mesh::DGMultiMesh, dg::DGMulti, other_args...) + dg.basis.Np * mesh.md.num_elements +end """ eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...) @@ -54,7 +62,9 @@ Return an iterator over the indices that specify the location in relevant data s for the elements in `mesh`. In particular, not the elements themselves are returned. """ -@inline eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(mesh.md.num_elements) +@inline function eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(mesh.md.num_elements) +end # iteration over quantities in a single element @inline nnodes(basis::RefElemData) = basis.Np @@ -66,7 +76,9 @@ Return an iterator over the indices that specify the location in relevant data s for the face nodes in `dg`. In particular, not the face_nodes themselves are returned. """ -@inline each_face_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nfq) +@inline function each_face_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(dg.basis.Nfq) +end """ each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) @@ -75,7 +87,9 @@ Return an iterator over the indices that specify the location in relevant data s for the quadrature nodes in `dg`. In particular, not the quadrature nodes themselves are returned. """ -@inline each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nq) +@inline function each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(dg.basis.Nq) +end # iteration over quantities over the entire mesh (dofs, quad nodes, face nodes). """ @@ -85,8 +99,9 @@ Return an iterator over the indices that specify the location in relevant data s for the degrees of freedom (DOF) in `dg`. In particular, not the DOFs themselves are returned. """ -@inline each_dof_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(ndofs(mesh, dg, other_args...)) - +@inline function each_dof_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(ndofs(mesh, dg, other_args...)) +end """ each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) @@ -95,7 +110,9 @@ Return an iterator over the indices that specify the location in relevant data s for the global quadrature nodes in `mesh`. In particular, not the quadrature nodes themselves are returned. """ -@inline each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nq * mesh.md.num_elements) +@inline function each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(dg.basis.Nq * mesh.md.num_elements) +end """ each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) @@ -104,156 +121,166 @@ Return an iterator over the indices that specify the location in relevant data s for the face nodes in `mesh`. In particular, not the face nodes themselves are returned. """ -@inline each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nfq * mesh.md.num_elements) +@inline function each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) + Base.OneTo(dg.basis.Nfq * mesh.md.num_elements) +end # interface with semidiscretization_hyperbolic wrap_array(u_ode, mesh::DGMultiMesh, equations, dg::DGMulti, cache) = u_ode wrap_array_native(u_ode, mesh::DGMultiMesh, equations, dg::DGMulti, cache) = u_ode -function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, mesh::DGMultiMesh, - dg::DGMulti, cache) where {Keys, ValueTypes<:NTuple{N, Any}} where {N} - return boundary_conditions +function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, + mesh::DGMultiMesh, + dg::DGMulti, + cache) where {Keys, ValueTypes <: NTuple{N, Any} + } where {N} + return boundary_conditions end # Allocate nested array type for DGMulti solution storage. function allocate_nested_array(uEltype, nvars, array_dimensions, dg) - # store components as separate arrays, combine via StructArrays - return StructArray{SVector{nvars, uEltype}}(ntuple(_->zeros(uEltype, array_dimensions...), nvars)) + # store components as separate arrays, combine via StructArrays + return StructArray{SVector{nvars, uEltype}}(ntuple(_ -> zeros(uEltype, + array_dimensions...), + nvars)) end function reset_du!(du, dg::DGMulti, other_args...) - @threaded for i in eachindex(du) - du[i] = zero(eltype(du)) - end + @threaded for i in eachindex(du) + du[i] = zero(eltype(du)) + end - return du + return du end # Constructs cache variables for both affine and non-affine (curved) DGMultiMeshes -function create_cache(mesh::DGMultiMesh{NDIMS}, equations, dg::DGMultiWeakForm, RealT, uEltype) where {NDIMS} - rd = dg.basis - md = mesh.md - - # volume quadrature weights, volume interpolation matrix, mass matrix, differentiation matrices - @unpack wq, Vq, M, Drst = rd - - # ∫f(u) * dv/dx_i = ∑_j (Vq*Drst[i])'*diagm(wq)*(rstxyzJ[i,j].*f(Vq*u)) - weak_differentiation_matrices = map(D -> -M \ ((Vq * D)' * Diagonal(wq)), Drst) - - nvars = nvariables(equations) - - # storage for volume quadrature values, face quadrature values, flux values - u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) - u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) - flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) - if typeof(rd.approximation_type) <: Union{SBP, AbstractNonperiodicDerivativeOperator} - lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators - else - lift_scalings = nothing - end +function create_cache(mesh::DGMultiMesh{NDIMS}, equations, dg::DGMultiWeakForm, RealT, + uEltype) where {NDIMS} + rd = dg.basis + md = mesh.md + + # volume quadrature weights, volume interpolation matrix, mass matrix, differentiation matrices + @unpack wq, Vq, M, Drst = rd + + # ∫f(u) * dv/dx_i = ∑_j (Vq*Drst[i])'*diagm(wq)*(rstxyzJ[i,j].*f(Vq*u)) + weak_differentiation_matrices = map(D -> -M \ ((Vq * D)' * Diagonal(wq)), Drst) + + nvars = nvariables(equations) + + # storage for volume quadrature values, face quadrature values, flux values + u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) + u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) + flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) + if typeof(rd.approximation_type) <: + Union{SBP, AbstractNonperiodicDerivativeOperator} + lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators + else + lift_scalings = nothing + end - # local storage for volume integral and source computations - local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] + # local storage for volume integral and source computations + local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] - # For curved meshes, we interpolate geometric terms from nodal points to quadrature points. - # For affine meshes, we just access one element of this interpolated data. - dxidxhatj = map(x -> rd.Vq * x, md.rstxyzJ) + # For curved meshes, we interpolate geometric terms from nodal points to quadrature points. + # For affine meshes, we just access one element of this interpolated data. + dxidxhatj = map(x -> rd.Vq * x, md.rstxyzJ) - # interpolate J to quadrature points for weight-adjusted DG (WADG) - invJ = inv.(rd.Vq * md.J) + # interpolate J to quadrature points for weight-adjusted DG (WADG) + invJ = inv.(rd.Vq * md.J) - # for scaling by curved geometric terms (not used by affine DGMultiMesh) - flux_threaded = - [[allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:NDIMS] for _ in 1:Threads.nthreads()] - rotated_flux_threaded = - [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] + # for scaling by curved geometric terms (not used by affine DGMultiMesh) + flux_threaded = [[allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:NDIMS] for _ in 1:Threads.nthreads()] + rotated_flux_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] - return (; md, weak_differentiation_matrices, lift_scalings, invJ, dxidxhatj, + return (; md, weak_differentiation_matrices, lift_scalings, invJ, dxidxhatj, u_values, u_face_values, flux_face_values, local_values_threaded, flux_threaded, rotated_flux_threaded) end function allocate_coefficients(mesh::DGMultiMesh, equations, dg::DGMulti, cache) - return allocate_nested_array(real(dg), nvariables(equations), size(mesh.md.x), dg) + return allocate_nested_array(real(dg), nvariables(equations), size(mesh.md.x), dg) end function compute_coefficients!(u, initial_condition, t, mesh::DGMultiMesh, equations, dg::DGMulti, cache) - md = mesh.md - rd = dg.basis - @unpack u_values = cache - - # evaluate the initial condition at quadrature points - @threaded for i in each_quad_node_global(mesh, dg, cache) - u_values[i] = initial_condition(SVector(getindex.(md.xyzq, i)), - t, equations) - end + md = mesh.md + rd = dg.basis + @unpack u_values = cache + + # evaluate the initial condition at quadrature points + @threaded for i in each_quad_node_global(mesh, dg, cache) + u_values[i] = initial_condition(SVector(getindex.(md.xyzq, i)), + t, equations) + end - # multiplying by Pq computes the L2 projection - apply_to_each_field(mul_by!(rd.Pq), u, u_values) + # multiplying by Pq computes the L2 projection + apply_to_each_field(mul_by!(rd.Pq), u, u_values) end # estimates the timestep based on polynomial degree and mesh. Does not account for physics (e.g., # computes an estimate of `dt` based on the advection equation with constant unit advection speed). function estimate_dt(mesh::DGMultiMesh, dg::DGMulti) - rd = dg.basis # RefElemData - return StartUpDG.estimate_h(rd, mesh.md) / StartUpDG.inverse_trace_constant(rd) + rd = dg.basis # RefElemData + return StartUpDG.estimate_h(rd, mesh.md) / StartUpDG.inverse_trace_constant(rd) end # for the stepsize callback function max_dt(u, t, mesh::DGMultiMesh, - constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} - - @unpack md = mesh - rd = dg.basis - - dt_min = Inf - for e in eachelement(mesh, dg, cache) - h_e = StartUpDG.estimate_h(e, rd, md) - max_speeds = ntuple(_->nextfloat(zero(t)), NDIMS) - for i in Base.OneTo(rd.Np) # loop over nodes - lambda_i = max_abs_speeds(u[i, e], equations) - max_speeds = max.(max_speeds, lambda_i) + constant_speed::False, equations, dg::DGMulti{NDIMS}, + cache) where {NDIMS} + @unpack md = mesh + rd = dg.basis + + dt_min = Inf + for e in eachelement(mesh, dg, cache) + h_e = StartUpDG.estimate_h(e, rd, md) + max_speeds = ntuple(_ -> nextfloat(zero(t)), NDIMS) + for i in Base.OneTo(rd.Np) # loop over nodes + lambda_i = max_abs_speeds(u[i, e], equations) + max_speeds = max.(max_speeds, lambda_i) + end + dt_min = min(dt_min, h_e / sum(max_speeds)) end - dt_min = min(dt_min, h_e / sum(max_speeds)) - end - # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by - # `polydeg+1`. This is because `nnodes(dg)` returns the total number of - # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns - # the number of 1D nodes for `DGSEM` solvers. - polydeg = rd.N - return 2 * dt_min / (polydeg + 1) + # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by + # `polydeg+1`. This is because `nnodes(dg)` returns the total number of + # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns + # the number of 1D nodes for `DGSEM` solvers. + polydeg = rd.N + return 2 * dt_min / (polydeg + 1) end function max_dt(u, t, mesh::DGMultiMesh, - constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} - - @unpack md = mesh - rd = dg.basis - - dt_min = Inf - for e in eachelement(mesh, dg, cache) - h_e = StartUpDG.estimate_h(e, rd, md) - max_speeds = ntuple(_->nextfloat(zero(t)), NDIMS) - for i in Base.OneTo(rd.Np) # loop over nodes - max_speeds = max.(max_abs_speeds(equations), max_speeds) + constant_speed::True, equations, dg::DGMulti{NDIMS}, + cache) where {NDIMS} + @unpack md = mesh + rd = dg.basis + + dt_min = Inf + for e in eachelement(mesh, dg, cache) + h_e = StartUpDG.estimate_h(e, rd, md) + max_speeds = ntuple(_ -> nextfloat(zero(t)), NDIMS) + for i in Base.OneTo(rd.Np) # loop over nodes + max_speeds = max.(max_abs_speeds(equations), max_speeds) + end + dt_min = min(dt_min, h_e / sum(max_speeds)) end - dt_min = min(dt_min, h_e / sum(max_speeds)) - end - # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by - # `polydeg+1`. This is because `nnodes(dg)` returns the total number of - # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns - # the number of 1D nodes for `DGSEM` solvers. - polydeg = rd.N - return 2 * dt_min / (polydeg + 1) + # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by + # `polydeg+1`. This is because `nnodes(dg)` returns the total number of + # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns + # the number of 1D nodes for `DGSEM` solvers. + polydeg = rd.N + return 2 * dt_min / (polydeg + 1) end # interpolates from solution coefficients to face quadrature points # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations, surface_integral, dg::DGMulti) - rd = dg.basis - @unpack u_face_values = cache - apply_to_each_field(mul_by!(rd.Vf), u_face_values, u) + rd = dg.basis + @unpack u_face_values = cache + apply_to_each_field(mul_by!(rd.Vf), u_face_values, u) end # version for affine meshes @@ -261,26 +288,25 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralWeakForm, dg::DGMulti, cache) - - rd = dg.basis - md = mesh.md - @unpack weak_differentiation_matrices, dxidxhatj, u_values, local_values_threaded = cache - @unpack rstxyzJ = md # geometric terms - - # interpolate to quadrature points - apply_to_each_field(mul_by!(rd.Vq), u_values, u) - - @threaded for e in eachelement(mesh, dg, cache) - - flux_values = local_values_threaded[Threads.threadid()] - for i in eachdim(mesh) - flux_values .= flux.(view(u_values, :, e), i, equations) - for j in eachdim(mesh) - apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj[i, j][1, e]), - view(du, :, e), flux_values) - end + rd = dg.basis + md = mesh.md + @unpack weak_differentiation_matrices, dxidxhatj, u_values, local_values_threaded = cache + @unpack rstxyzJ = md # geometric terms + + # interpolate to quadrature points + apply_to_each_field(mul_by!(rd.Vq), u_values, u) + + @threaded for e in eachelement(mesh, dg, cache) + flux_values = local_values_threaded[Threads.threadid()] + for i in eachdim(mesh) + flux_values .= flux.(view(u_values, :, e), i, equations) + for j in eachdim(mesh) + apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], + dxidxhatj[i, j][1, e]), + view(du, :, e), flux_values) + end + end end - end end # version for curved meshes @@ -288,103 +314,101 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh{NDIMS, <:NonAffine}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralWeakForm, dg::DGMulti, cache) where {NDIMS} + rd = dg.basis + (; weak_differentiation_matrices, u_values) = cache + (; dxidxhatj) = cache - rd = dg.basis - (; weak_differentiation_matrices, u_values) = cache - (; dxidxhatj) = cache - - # interpolate to quadrature points - apply_to_each_field(mul_by!(rd.Vq), u_values, u) - - @threaded for e in eachelement(mesh, dg, cache) - - flux_values = cache.flux_threaded[Threads.threadid()] - for i in eachdim(mesh) - flux_values[i] .= flux.(view(u_values, :, e), i, equations) - end - - # rotate flux with df_i/dx_i = sum_j d(x_i)/d(x̂_j) * d(f_i)/d(x̂_j). - # Example: df_x/dx + df_y/dy = dr/dx * df_x/dr + ds/dx * df_x/ds - # + dr/dy * df_y/dr + ds/dy * df_y/ds - # = Dr * (dr/dx * fx + dr/dy * fy) + Ds * (...) - # = Dr * (f_r) + Ds * (f_s) - - rotated_flux_values = cache.rotated_flux_threaded[Threads.threadid()] - for j in eachdim(mesh) - - fill!(rotated_flux_values, zero(eltype(rotated_flux_values))) + # interpolate to quadrature points + apply_to_each_field(mul_by!(rd.Vq), u_values, u) - # compute rotated fluxes - for i in eachdim(mesh) - for ii in eachindex(rotated_flux_values) - flux_i_node = flux_values[i][ii] - dxidxhatj_node = dxidxhatj[i, j][ii, e] - rotated_flux_values[ii] = rotated_flux_values[ii] + dxidxhatj_node * flux_i_node + @threaded for e in eachelement(mesh, dg, cache) + flux_values = cache.flux_threaded[Threads.threadid()] + for i in eachdim(mesh) + flux_values[i] .= flux.(view(u_values, :, e), i, equations) end - end - # apply weak differentiation matrices to rotated fluxes - apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j]), - view(du, :, e), rotated_flux_values) + # rotate flux with df_i/dx_i = sum_j d(x_i)/d(x̂_j) * d(f_i)/d(x̂_j). + # Example: df_x/dx + df_y/dy = dr/dx * df_x/dr + ds/dx * df_x/ds + # + dr/dy * df_y/dr + ds/dy * df_y/ds + # = Dr * (dr/dx * fx + dr/dy * fy) + Ds * (...) + # = Dr * (f_r) + Ds * (f_s) + + rotated_flux_values = cache.rotated_flux_threaded[Threads.threadid()] + for j in eachdim(mesh) + fill!(rotated_flux_values, zero(eltype(rotated_flux_values))) + + # compute rotated fluxes + for i in eachdim(mesh) + for ii in eachindex(rotated_flux_values) + flux_i_node = flux_values[i][ii] + dxidxhatj_node = dxidxhatj[i, j][ii, e] + rotated_flux_values[ii] = rotated_flux_values[ii] + + dxidxhatj_node * flux_i_node + end + end + + # apply weak differentiation matrices to rotated fluxes + apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j]), + view(du, :, e), rotated_flux_values) + end end - end end function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, dg::DGMulti{NDIMS}) where {NDIMS} - - @unpack surface_flux = surface_integral - md = mesh.md - @unpack mapM, mapP, nxyzJ, Jf = md - @unpack u_face_values, flux_face_values = cache - - @threaded for face_node_index in each_face_node_global(mesh, dg, cache) - - # inner (idM -> minus) and outer (idP -> plus) indices - idM, idP = mapM[face_node_index], mapP[face_node_index] - uM = u_face_values[idM] - uP = u_face_values[idP] - normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM] - flux_face_values[idM] = surface_flux(uM, uP, normal, equations) * Jf[idM] - end + @unpack surface_flux = surface_integral + md = mesh.md + @unpack mapM, mapP, nxyzJ, Jf = md + @unpack u_face_values, flux_face_values = cache + + @threaded for face_node_index in each_face_node_global(mesh, dg, cache) + + # inner (idM -> minus) and outer (idP -> plus) indices + idM, idP = mapM[face_node_index], mapP[face_node_index] + uM = u_face_values[idM] + uP = u_face_values[idP] + normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM] + flux_face_values[idM] = surface_flux(uM, uP, normal, equations) * Jf[idM] + end end function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm, mesh::DGMultiMesh, have_nonconservative_terms::True, equations, dg::DGMulti{NDIMS}) where {NDIMS} - - flux_conservative, flux_nonconservative = surface_integral.surface_flux - md = mesh.md - @unpack mapM, mapP, nxyzJ, Jf = md - @unpack u_face_values, flux_face_values = cache - - @threaded for face_node_index in each_face_node_global(mesh, dg, cache) - - # inner (idM -> minus) and outer (idP -> plus) indices - idM, idP = mapM[face_node_index], mapP[face_node_index] - uM = u_face_values[idM] - - # compute flux if node is not a boundary node - if idM != idP - uP = u_face_values[idP] - normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM] - conservative_part = flux_conservative(uM, uP, normal, equations) - - # Two notes on the use of `flux_nonconservative`: - # 1. In contrast to other mesh types, only one nonconservative part needs to be - # computed since we loop over the elements, not the unique interfaces. - # 2. In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. However, - # both are the same at watertight interfaces, so we pass `normal` twice. - nonconservative_part = flux_nonconservative(uM, uP, normal, normal, equations) - # The factor 0.5 is necessary for the nonconservative fluxes based on the - # interpretation of global SBP operators. - flux_face_values[idM] = (conservative_part + 0.5 * nonconservative_part) * Jf[idM] + flux_conservative, flux_nonconservative = surface_integral.surface_flux + md = mesh.md + @unpack mapM, mapP, nxyzJ, Jf = md + @unpack u_face_values, flux_face_values = cache + + @threaded for face_node_index in each_face_node_global(mesh, dg, cache) + + # inner (idM -> minus) and outer (idP -> plus) indices + idM, idP = mapM[face_node_index], mapP[face_node_index] + uM = u_face_values[idM] + + # compute flux if node is not a boundary node + if idM != idP + uP = u_face_values[idP] + normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM] + conservative_part = flux_conservative(uM, uP, normal, equations) + + # Two notes on the use of `flux_nonconservative`: + # 1. In contrast to other mesh types, only one nonconservative part needs to be + # computed since we loop over the elements, not the unique interfaces. + # 2. In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. However, + # both are the same at watertight interfaces, so we pass `normal` twice. + nonconservative_part = flux_nonconservative(uM, uP, normal, normal, + equations) + # The factor 0.5 is necessary for the nonconservative fluxes based on the + # interpretation of global SBP operators. + flux_face_values[idM] = (conservative_part + 0.5 * nonconservative_part) * + Jf[idM] + end end - end end # assumes cache.flux_face_values is computed and filled with @@ -392,22 +416,22 @@ end function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGMulti, cache) - rd = dg.basis - apply_to_each_field(mul_by_accum!(rd.LIFT), du, cache.flux_face_values) + rd = dg.basis + apply_to_each_field(mul_by_accum!(rd.LIFT), du, cache.flux_face_values) end # Specialize for nodal SBP discretizations. Uses that Vf*u = u[Fmask,:] # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations, surface_integral, dg::DGMultiSBP) - rd = dg.basis - @unpack Fmask = rd - @unpack u_face_values = cache - @threaded for e in eachelement(mesh, dg, cache) - for (i,fid) in enumerate(Fmask) - u_face_values[i, e] = u[fid, e] + rd = dg.basis + @unpack Fmask = rd + @unpack u_face_values = cache + @threaded for e in eachelement(mesh, dg, cache) + for (i, fid) in enumerate(Fmask) + u_face_values[i, e] = u[fid, e] + end end - end end # Specialize for nodal SBP discretizations. Uses that du = LIFT*u is equivalent to @@ -415,141 +439,152 @@ end function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGMultiSBP, cache) - rd = dg.basis - @unpack flux_face_values, lift_scalings = cache + rd = dg.basis + @unpack flux_face_values, lift_scalings = cache - @threaded for e in eachelement(mesh, dg, cache) - for i in each_face_node(mesh, dg, cache) - fid = rd.Fmask[i] - du[fid, e] = du[fid, e] + flux_face_values[i,e] * lift_scalings[i] + @threaded for e in eachelement(mesh, dg, cache) + for i in each_face_node(mesh, dg, cache) + fid = rd.Fmask[i] + du[fid, e] = du[fid, e] + flux_face_values[i, e] * lift_scalings[i] + end end - end end # do nothing for periodic (default) boundary conditions -calc_boundary_flux!(cache, t, boundary_conditions::BoundaryConditionPeriodic, - mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing +function calc_boundary_flux!(cache, t, boundary_conditions::BoundaryConditionPeriodic, + mesh, have_nonconservative_terms, equations, dg::DGMulti) + nothing +end # "lispy tuple programming" instead of for loop for type stability function calc_boundary_flux!(cache, t, boundary_conditions, mesh, have_nonconservative_terms, equations, dg::DGMulti) - # peel off first boundary condition - calc_single_boundary_flux!(cache, t, first(boundary_conditions), first(keys(boundary_conditions)), - mesh, have_nonconservative_terms, equations, dg) + # peel off first boundary condition + calc_single_boundary_flux!(cache, t, first(boundary_conditions), + first(keys(boundary_conditions)), + mesh, have_nonconservative_terms, equations, dg) - # recurse on the remainder of the boundary conditions - calc_boundary_flux!(cache, t, Base.tail(boundary_conditions), - mesh, have_nonconservative_terms, equations, dg) + # recurse on the remainder of the boundary conditions + calc_boundary_flux!(cache, t, Base.tail(boundary_conditions), + mesh, have_nonconservative_terms, equations, dg) end # terminate recursion -calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple{(),Tuple{}}, - mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing +function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple{(), Tuple{}}, + mesh, have_nonconservative_terms, equations, dg::DGMulti) + nothing +end function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh, - have_nonconservative_terms::False, equations, dg::DGMulti{NDIMS}) where {NDIMS} - - rd = dg.basis - md = mesh.md - @unpack u_face_values, flux_face_values = cache - @unpack xyzf, nxyzJ, Jf = md - @unpack surface_flux = dg.surface_integral - - # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total). - # mesh.boundary_faces indexes into the columns of these face-reshaped arrays. - num_faces = StartUpDG.num_faces(rd.element_type) - num_pts_per_face = rd.Nfq ÷ num_faces - num_faces_total = num_faces * md.num_elements - - # This function was originally defined as - # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`. - # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313. - # To avoid allocations, we use Tim Holy's suggestion: - # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300. - reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ()) - - u_face_values = reshape_by_face(u_face_values) - flux_face_values = reshape_by_face(flux_face_values) - Jf = reshape_by_face(Jf) - nxyzJ, xyzf = reshape_by_face.(nxyzJ), reshape_by_face.(xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix} - - # loop through boundary faces, which correspond to columns of reshaped u_face_values, ... - for f in mesh.boundary_faces[boundary_key] - for i in Base.OneTo(num_pts_per_face) - face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i,f] - face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f)) - flux_face_values[i,f] = boundary_condition(u_face_values[i,f], - face_normal, face_coordinates, t, - surface_flux, equations) * Jf[i,f] + have_nonconservative_terms::False, equations, + dg::DGMulti{NDIMS}) where {NDIMS} + rd = dg.basis + md = mesh.md + @unpack u_face_values, flux_face_values = cache + @unpack xyzf, nxyzJ, Jf = md + @unpack surface_flux = dg.surface_integral + + # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total). + # mesh.boundary_faces indexes into the columns of these face-reshaped arrays. + num_faces = StartUpDG.num_faces(rd.element_type) + num_pts_per_face = rd.Nfq ÷ num_faces + num_faces_total = num_faces * md.num_elements + + # This function was originally defined as + # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`. + # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313. + # To avoid allocations, we use Tim Holy's suggestion: + # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300. + reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ()) + + u_face_values = reshape_by_face(u_face_values) + flux_face_values = reshape_by_face(flux_face_values) + Jf = reshape_by_face(Jf) + nxyzJ, xyzf = reshape_by_face.(nxyzJ), reshape_by_face.(xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix} + + # loop through boundary faces, which correspond to columns of reshaped u_face_values, ... + for f in mesh.boundary_faces[boundary_key] + for i in Base.OneTo(num_pts_per_face) + face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i, f] + face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f)) + flux_face_values[i, f] = boundary_condition(u_face_values[i, f], + face_normal, face_coordinates, + t, + surface_flux, equations) * + Jf[i, f] + end end - end - # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values. - # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape. + # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values. + # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape. end function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh, - have_nonconservative_terms::True, equations, dg::DGMulti{NDIMS}) where {NDIMS} - - rd = dg.basis - md = mesh.md - surface_flux, nonconservative_flux = dg.surface_integral.surface_flux - - # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total). - # mesh.boundary_faces indexes into the columns of these face-reshaped arrays. - num_pts_per_face = rd.Nfq ÷ StartUpDG.num_faces(rd.element_type) - num_faces_total = StartUpDG.num_faces(rd.element_type) * md.num_elements - - # This function was originally defined as - # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`. - # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313. - # To avoid allocations, we use Tim Holy's suggestion: - # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300. - reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ()) - - u_face_values = reshape_by_face(cache.u_face_values) - flux_face_values = reshape_by_face(cache.flux_face_values) - Jf = reshape_by_face(md.Jf) - nxyzJ, xyzf = reshape_by_face.(md.nxyzJ), reshape_by_face.(md.xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix} - - # loop through boundary faces, which correspond to columns of reshaped u_face_values, ... - for f in mesh.boundary_faces[boundary_key] - for i in Base.OneTo(num_pts_per_face) - face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i,f] - face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f)) - - # Compute conservative and non-conservative fluxes separately. - # This imposes boundary conditions on the conservative part of the flux. - cons_flux_at_face_node = boundary_condition(u_face_values[i,f], face_normal, face_coordinates, t, - surface_flux, equations) - - # Compute pointwise nonconservative numerical flux at the boundary. - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, there is only one `face_normal` at boundaries, which we pass in twice. - # Note: This does not set any type of boundary condition for the nonconservative term - noncons_flux_at_face_node = nonconservative_flux(u_face_values[i,f], u_face_values[i,f], - face_normal, face_normal, equations) - - flux_face_values[i,f] = (cons_flux_at_face_node + 0.5 * noncons_flux_at_face_node) * Jf[i,f] - + have_nonconservative_terms::True, equations, + dg::DGMulti{NDIMS}) where {NDIMS} + rd = dg.basis + md = mesh.md + surface_flux, nonconservative_flux = dg.surface_integral.surface_flux + + # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total). + # mesh.boundary_faces indexes into the columns of these face-reshaped arrays. + num_pts_per_face = rd.Nfq ÷ StartUpDG.num_faces(rd.element_type) + num_faces_total = StartUpDG.num_faces(rd.element_type) * md.num_elements + + # This function was originally defined as + # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`. + # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313. + # To avoid allocations, we use Tim Holy's suggestion: + # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300. + reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ()) + + u_face_values = reshape_by_face(cache.u_face_values) + flux_face_values = reshape_by_face(cache.flux_face_values) + Jf = reshape_by_face(md.Jf) + nxyzJ, xyzf = reshape_by_face.(md.nxyzJ), reshape_by_face.(md.xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix} + + # loop through boundary faces, which correspond to columns of reshaped u_face_values, ... + for f in mesh.boundary_faces[boundary_key] + for i in Base.OneTo(num_pts_per_face) + face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i, f] + face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f)) + + # Compute conservative and non-conservative fluxes separately. + # This imposes boundary conditions on the conservative part of the flux. + cons_flux_at_face_node = boundary_condition(u_face_values[i, f], + face_normal, face_coordinates, + t, + surface_flux, equations) + + # Compute pointwise nonconservative numerical flux at the boundary. + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, there is only one `face_normal` at boundaries, which we pass in twice. + # Note: This does not set any type of boundary condition for the nonconservative term + noncons_flux_at_face_node = nonconservative_flux(u_face_values[i, f], + u_face_values[i, f], + face_normal, face_normal, + equations) + + flux_face_values[i, f] = (cons_flux_at_face_node + + 0.5 * noncons_flux_at_face_node) * Jf[i, f] + end end - end - # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values. - # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape. + # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values. + # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape. end - # inverts Jacobian and scales by -1.0 -function invert_jacobian!(du, mesh::DGMultiMesh, equations, dg::DGMulti, cache; scaling=-1) - @threaded for e in eachelement(mesh, dg, cache) - invJ = cache.invJ[1, e] - for i in axes(du, 1) - du[i, e] *= scaling * invJ +function invert_jacobian!(du, mesh::DGMultiMesh, equations, dg::DGMulti, cache; + scaling = -1) + @threaded for e in eachelement(mesh, dg, cache) + invJ = cache.invJ[1, e] + for i in axes(du, 1) + du[i, e] *= scaling * invJ + end end - end end # inverts Jacobian using weight-adjusted DG, and scales by -1.0. @@ -557,87 +592,92 @@ end # "Weight-adjusted discontinuous Galerkin methods: curvilinear meshes." # https://doi.org/10.1137/16M1089198 function invert_jacobian!(du, mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations, - dg::DGMulti, cache; scaling=-1) where {NDIMS} - # Vq = interpolation matrix to quadrature points, Pq = quadrature-based L2 projection matrix - (; Pq, Vq) = dg.basis - (; local_values_threaded, invJ) = cache + dg::DGMulti, cache; scaling = -1) where {NDIMS} + # Vq = interpolation matrix to quadrature points, Pq = quadrature-based L2 projection matrix + (; Pq, Vq) = dg.basis + (; local_values_threaded, invJ) = cache - @threaded for e in eachelement(mesh, dg, cache) - du_at_quad_points = local_values_threaded[Threads.threadid()] + @threaded for e in eachelement(mesh, dg, cache) + du_at_quad_points = local_values_threaded[Threads.threadid()] - # interpolate solution to quadrature - apply_to_each_field(mul_by!(Vq), du_at_quad_points, view(du, :, e)) + # interpolate solution to quadrature + apply_to_each_field(mul_by!(Vq), du_at_quad_points, view(du, :, e)) - # scale by quadrature points - for i in eachindex(du_at_quad_points) - du_at_quad_points[i] *= scaling * invJ[i, e] - end + # scale by quadrature points + for i in eachindex(du_at_quad_points) + du_at_quad_points[i] *= scaling * invJ[i, e] + end - # project back to polynomials - apply_to_each_field(mul_by!(Pq), view(du, :, e), du_at_quad_points) - end + # project back to polynomials + apply_to_each_field(mul_by!(Pq), view(du, :, e), du_at_quad_points) + end end # Multiple calc_sources! to resolve method ambiguities -calc_sources!(du, u, t, source_terms::Nothing, - mesh, equations, dg::DGMulti, cache) = nothing -calc_sources!(du, u, t, source_terms::Nothing, - mesh, equations, dg::DGMultiFluxDiffSBP, cache) = nothing +function calc_sources!(du, u, t, source_terms::Nothing, + mesh, equations, dg::DGMulti, cache) + nothing +end +function calc_sources!(du, u, t, source_terms::Nothing, + mesh, equations, dg::DGMultiFluxDiffSBP, cache) + nothing +end # uses quadrature + projection to compute source terms. function calc_sources!(du, u, t, source_terms, mesh, equations, dg::DGMulti, cache) - - rd = dg.basis - md = mesh.md - @unpack Pq = rd - @unpack u_values, local_values_threaded = cache - @threaded for e in eachelement(mesh, dg, cache) - - source_values = local_values_threaded[Threads.threadid()] - - u_e = view(u_values, :, e) # u_values should already be computed from volume integral - - for i in each_quad_node(mesh, dg, cache) - source_values[i] = source_terms(u_e[i], SVector(getindex.(md.xyzq, i, e)), - t, equations) + rd = dg.basis + md = mesh.md + @unpack Pq = rd + @unpack u_values, local_values_threaded = cache + @threaded for e in eachelement(mesh, dg, cache) + source_values = local_values_threaded[Threads.threadid()] + + u_e = view(u_values, :, e) # u_values should already be computed from volume integral + + for i in each_quad_node(mesh, dg, cache) + source_values[i] = source_terms(u_e[i], SVector(getindex.(md.xyzq, i, e)), + t, equations) + end + apply_to_each_field(mul_by_accum!(Pq), view(du, :, e), source_values) end - apply_to_each_field(mul_by_accum!(Pq), view(du, :, e), source_values) - end end function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC, source_terms::Source, dg::DGMulti, cache) where {BC, Source} + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, dg.surface_integral, dg) + end - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache, dg.surface_integral, mesh, - have_nonconservative_terms(equations), equations, dg) + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, dg.surface_integral, mesh, + have_nonconservative_terms(equations), equations, dg) + end - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, - have_nonconservative_terms(equations), equations, dg) + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, + have_nonconservative_terms(equations), equations, dg) + end - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache) + end - @trixi_timeit timer() "Jacobian" invert_jacobian!( - du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache) - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, mesh, equations, dg, cache) + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache) + end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgmulti/dg_parabolic.jl b/src/solvers/dgmulti/dg_parabolic.jl index c4015dbca04..72dbe2c4256 100644 --- a/src/solvers/dgmulti/dg_parabolic.jl +++ b/src/solvers/dgmulti/dg_parabolic.jl @@ -4,387 +4,417 @@ function create_cache_parabolic(mesh::DGMultiMesh, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti, parabolic_scheme, RealT, uEltype) - # default to taking derivatives of all hyperbolic variables - # TODO: parabolic; utilize the parabolic variables in `equations_parabolic` to reduce memory usage in the parabolic cache - nvars = nvariables(equations_hyperbolic) - - (; M, Vq, Pq, Drst) = dg.basis - - # gradient operators: map from nodes to quadrature - strong_differentiation_matrices = map(A -> Vq * A, Drst) - gradient_lift_matrix = Vq * dg.basis.LIFT - - # divergence operators: map from quadrature to nodes - weak_differentiation_matrices = map(A -> (M \ (-A' * M * Pq)), Drst) - divergence_lift_matrix = dg.basis.LIFT - projection_face_interpolation_matrix = dg.basis.Vf * dg.basis.Pq - - # evaluate geometric terms at quadrature points in case the mesh is curved - (; md) = mesh - J = dg.basis.Vq * md.J - invJ = inv.(J) - dxidxhatj = map(x -> dg.basis.Vq * x, md.rstxyzJ) - - # u_transformed stores "transformed" variables for computing the gradient - u_transformed = allocate_nested_array(uEltype, nvars, size(md.x), dg) - gradients = SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, (dg.basis.Nq, mesh.md.num_elements)), ndims(mesh))) - flux_viscous = similar.(gradients) - - u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) - scalar_flux_face_values = similar(u_face_values) - gradients_face_values = ntuple(_ -> similar(u_face_values), ndims(mesh)) - - local_u_values_threaded = [similar(u_transformed, dg.basis.Nq) for _ in 1:Threads.nthreads()] - local_flux_viscous_threaded = [SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, dg.basis.Nq), ndims(mesh))) for _ in 1:Threads.nthreads()] - local_flux_face_values_threaded = [similar(scalar_flux_face_values[:, 1]) for _ in 1:Threads.nthreads()] - - return (; u_transformed, gradients, flux_viscous, + # default to taking derivatives of all hyperbolic variables + # TODO: parabolic; utilize the parabolic variables in `equations_parabolic` to reduce memory usage in the parabolic cache + nvars = nvariables(equations_hyperbolic) + + (; M, Vq, Pq, Drst) = dg.basis + + # gradient operators: map from nodes to quadrature + strong_differentiation_matrices = map(A -> Vq * A, Drst) + gradient_lift_matrix = Vq * dg.basis.LIFT + + # divergence operators: map from quadrature to nodes + weak_differentiation_matrices = map(A -> (M \ (-A' * M * Pq)), Drst) + divergence_lift_matrix = dg.basis.LIFT + projection_face_interpolation_matrix = dg.basis.Vf * dg.basis.Pq + + # evaluate geometric terms at quadrature points in case the mesh is curved + (; md) = mesh + J = dg.basis.Vq * md.J + invJ = inv.(J) + dxidxhatj = map(x -> dg.basis.Vq * x, md.rstxyzJ) + + # u_transformed stores "transformed" variables for computing the gradient + u_transformed = allocate_nested_array(uEltype, nvars, size(md.x), dg) + gradients = SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, + (dg.basis.Nq, + mesh.md.num_elements)), + ndims(mesh))) + flux_viscous = similar.(gradients) + + u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) + scalar_flux_face_values = similar(u_face_values) + gradients_face_values = ntuple(_ -> similar(u_face_values), ndims(mesh)) + + local_u_values_threaded = [similar(u_transformed, dg.basis.Nq) + for _ in 1:Threads.nthreads()] + local_flux_viscous_threaded = [SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, + dg.basis.Nq), + ndims(mesh))) + for _ in 1:Threads.nthreads()] + local_flux_face_values_threaded = [similar(scalar_flux_face_values[:, 1]) + for _ in 1:Threads.nthreads()] + + return (; u_transformed, gradients, flux_viscous, weak_differentiation_matrices, strong_differentiation_matrices, - gradient_lift_matrix, projection_face_interpolation_matrix, divergence_lift_matrix, + gradient_lift_matrix, projection_face_interpolation_matrix, + divergence_lift_matrix, dxidxhatj, J, invJ, # geometric terms u_face_values, gradients_face_values, scalar_flux_face_values, - local_u_values_threaded, local_flux_viscous_threaded, local_flux_face_values_threaded) + local_u_values_threaded, local_flux_viscous_threaded, + local_flux_face_values_threaded) end # Transform solution variables prior to taking the gradient # (e.g., conservative to primitive variables). Defaults to doing nothing. # TODO: can we avoid copying data? -function transform_variables!(u_transformed, u, mesh, equations_parabolic::AbstractEquationsParabolic, +function transform_variables!(u_transformed, u, mesh, + equations_parabolic::AbstractEquationsParabolic, dg::DGMulti, parabolic_scheme, cache, cache_parabolic) - @threaded for i in eachindex(u) - u_transformed[i] = gradient_variable_transformation(equations_parabolic)(u[i], equations_parabolic) - end + @threaded for i in eachindex(u) + u_transformed[i] = gradient_variable_transformation(equations_parabolic)(u[i], + equations_parabolic) + end end # TODO: reuse entropy projection computations for DGMultiFluxDiff{<:Polynomial} (including `GaussSBP` solvers) function calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values, mesh, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) - (; gradient_lift_matrix, local_flux_face_values_threaded) = cache_parabolic - @threaded for e in eachelement(mesh, dg) - local_flux_values = local_flux_face_values_threaded[Threads.threadid()] - for dim in eachdim(mesh) - for i in eachindex(local_flux_values) - # compute flux * (nx, ny, nz) - local_flux_values[i] = scalar_flux_face_values[i, e] * mesh.md.nxyzJ[dim][i, e] - end - apply_to_each_field(mul_by_accum!(gradient_lift_matrix), view(gradients[dim], :, e), local_flux_values) + (; gradient_lift_matrix, local_flux_face_values_threaded) = cache_parabolic + @threaded for e in eachelement(mesh, dg) + local_flux_values = local_flux_face_values_threaded[Threads.threadid()] + for dim in eachdim(mesh) + for i in eachindex(local_flux_values) + # compute flux * (nx, ny, nz) + local_flux_values[i] = scalar_flux_face_values[i, e] * + mesh.md.nxyzJ[dim][i, e] + end + apply_to_each_field(mul_by_accum!(gradient_lift_matrix), + view(gradients[dim], :, e), local_flux_values) + end end - end end function calc_gradient_volume_integral!(gradients, u, mesh::DGMultiMesh, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) + (; strong_differentiation_matrices) = cache_parabolic - (; strong_differentiation_matrices) = cache_parabolic - - # compute volume contributions to gradients - @threaded for e in eachelement(mesh, dg) - for i in eachdim(mesh), j in eachdim(mesh) + # compute volume contributions to gradients + @threaded for e in eachelement(mesh, dg) + for i in eachdim(mesh), j in eachdim(mesh) - # We assume each element is affine (e.g., constant geometric terms) here. - dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] + # We assume each element is affine (e.g., constant geometric terms) here. + dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] - apply_to_each_field(mul_by_accum!(strong_differentiation_matrices[j], dxidxhatj), - view(gradients[i], :, e), view(u, :, e)) + apply_to_each_field(mul_by_accum!(strong_differentiation_matrices[j], + dxidxhatj), + view(gradients[i], :, e), view(u, :, e)) + end end - end end function calc_gradient_volume_integral!(gradients, u, mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) where {NDIMS} + (; strong_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic - (; strong_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic + # compute volume contributions to gradients + @threaded for e in eachelement(mesh, dg) - # compute volume contributions to gradients - @threaded for e in eachelement(mesh, dg) - - # compute gradients with respect to reference coordinates - local_reference_gradients = local_flux_viscous_threaded[Threads.threadid()] - for i in eachdim(mesh) - apply_to_each_field(mul_by!(strong_differentiation_matrices[i]), - local_reference_gradients[i], view(u, :, e)) - end + # compute gradients with respect to reference coordinates + local_reference_gradients = local_flux_viscous_threaded[Threads.threadid()] + for i in eachdim(mesh) + apply_to_each_field(mul_by!(strong_differentiation_matrices[i]), + local_reference_gradients[i], view(u, :, e)) + end - # rotate to physical frame on each element - for i in eachdim(mesh), j in eachdim(mesh) - for node in eachindex(local_reference_gradients[j]) - gradients[i][node, e] = gradients[i][node, e] + dxidxhatj[i, j][node, e] * local_reference_gradients[j][node] - end + # rotate to physical frame on each element + for i in eachdim(mesh), j in eachdim(mesh) + for node in eachindex(local_reference_gradients[j]) + gradients[i][node, e] = gradients[i][node, e] + + dxidxhatj[i, j][node, e] * + local_reference_gradients[j][node] + end + end end - end end function calc_gradient!(gradients, u::StructArray, t, mesh::DGMultiMesh, equations::AbstractEquationsParabolic, boundary_conditions, dg::DGMulti, cache, cache_parabolic) + for dim in eachindex(gradients) + reset_du!(gradients[dim], dg) + end - for dim in eachindex(gradients) - reset_du!(gradients[dim], dg) - end - - calc_gradient_volume_integral!(gradients, u, mesh, equations, dg, cache, cache_parabolic) - - (; u_face_values) = cache_parabolic - apply_to_each_field(mul_by!(dg.basis.Vf), u_face_values, u) - - # compute fluxes at interfaces - (; scalar_flux_face_values) = cache_parabolic - (; mapM, mapP) = mesh.md - @threaded for face_node_index in each_face_node_global(mesh, dg) - idM, idP = mapM[face_node_index], mapP[face_node_index] - uM = u_face_values[idM] - uP = u_face_values[idP] - # Here, we use the "strong" formulation to compute the gradient. This guarantees that the parabolic - # formulation is symmetric and stable on curved meshes with variable geometric terms. - scalar_flux_face_values[idM] = 0.5 * (uP - uM) - end - - calc_boundary_flux!(scalar_flux_face_values, u_face_values, t, Gradient(), boundary_conditions, - mesh, equations, dg, cache, cache_parabolic) + calc_gradient_volume_integral!(gradients, u, mesh, equations, dg, cache, + cache_parabolic) + + (; u_face_values) = cache_parabolic + apply_to_each_field(mul_by!(dg.basis.Vf), u_face_values, u) + + # compute fluxes at interfaces + (; scalar_flux_face_values) = cache_parabolic + (; mapM, mapP) = mesh.md + @threaded for face_node_index in each_face_node_global(mesh, dg) + idM, idP = mapM[face_node_index], mapP[face_node_index] + uM = u_face_values[idM] + uP = u_face_values[idP] + # Here, we use the "strong" formulation to compute the gradient. This guarantees that the parabolic + # formulation is symmetric and stable on curved meshes with variable geometric terms. + scalar_flux_face_values[idM] = 0.5 * (uP - uM) + end - # compute surface contributions - calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values, - mesh, equations, dg, cache, cache_parabolic) + calc_boundary_flux!(scalar_flux_face_values, u_face_values, t, Gradient(), + boundary_conditions, + mesh, equations, dg, cache, cache_parabolic) - invert_jacobian_gradient!(gradients, mesh, equations, dg, cache, cache_parabolic) + # compute surface contributions + calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values, + mesh, equations, dg, cache, cache_parabolic) + invert_jacobian_gradient!(gradients, mesh, equations, dg, cache, cache_parabolic) end # affine mesh - constant Jacobian version function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh, equations, dg::DGMulti, cache, cache_parabolic) - @threaded for e in eachelement(mesh, dg) + @threaded for e in eachelement(mesh, dg) - # Here, we exploit the fact that J is constant on affine elements, - # so we only have to access invJ once per element. - invJ = cache_parabolic.invJ[1, e] + # Here, we exploit the fact that J is constant on affine elements, + # so we only have to access invJ once per element. + invJ = cache_parabolic.invJ[1, e] - for dim in eachdim(mesh) - for i in axes(gradients[dim], 1) - gradients[dim][i, e] = gradients[dim][i, e] * invJ - end + for dim in eachdim(mesh) + for i in axes(gradients[dim], 1) + gradients[dim][i, e] = gradients[dim][i, e] * invJ + end + end end - end end # non-affine mesh - variable Jacobian version -function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations, +function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh{NDIMS, <:NonAffine}, + equations, dg::DGMulti, cache, cache_parabolic) where {NDIMS} - (; invJ) = cache_parabolic - @threaded for e in eachelement(mesh, dg) - for dim in eachdim(mesh) - for i in axes(gradients[dim], 1) - gradients[dim][i, e] = gradients[dim][i, e] * invJ[i, e] - end + (; invJ) = cache_parabolic + @threaded for e in eachelement(mesh, dg) + for dim in eachdim(mesh) + for i in axes(gradients[dim], 1) + gradients[dim][i, e] = gradients[dim][i, e] * invJ[i, e] + end + end end - end end # do nothing for periodic domains function calc_boundary_flux!(flux, u, t, operator_type, ::BoundaryConditionPeriodic, mesh, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) - return nothing + return nothing end # "lispy tuple programming" instead of for loop for type stability function calc_boundary_flux!(flux, u, t, operator_type, boundary_conditions, mesh, equations, dg::DGMulti, cache, cache_parabolic) - # peel off first boundary condition - calc_single_boundary_flux!(flux, u, t, operator_type, first(boundary_conditions), first(keys(boundary_conditions)), - mesh, equations, dg, cache, cache_parabolic) + # peel off first boundary condition + calc_single_boundary_flux!(flux, u, t, operator_type, first(boundary_conditions), + first(keys(boundary_conditions)), + mesh, equations, dg, cache, cache_parabolic) - # recurse on the remainder of the boundary conditions - calc_boundary_flux!(flux, u, t, operator_type, Base.tail(boundary_conditions), - mesh, equations, dg, cache, cache_parabolic) + # recurse on the remainder of the boundary conditions + calc_boundary_flux!(flux, u, t, operator_type, Base.tail(boundary_conditions), + mesh, equations, dg, cache, cache_parabolic) end # terminate recursion -calc_boundary_flux!(flux, u, t, operator_type, boundary_conditions::NamedTuple{(),Tuple{}}, - mesh, equations, dg::DGMulti, cache, cache_parabolic) = nothing +function calc_boundary_flux!(flux, u, t, operator_type, + boundary_conditions::NamedTuple{(), Tuple{}}, + mesh, equations, dg::DGMulti, cache, cache_parabolic) + nothing +end function calc_single_boundary_flux!(flux_face_values, u_face_values, t, operator_type, boundary_condition, boundary_key, - mesh, equations, dg::DGMulti{NDIMS}, cache, cache_parabolic) where {NDIMS} - rd = dg.basis - md = mesh.md - - num_faces = StartUpDG.num_faces(rd.element_type) - num_pts_per_face = rd.Nfq ÷ num_faces - (; xyzf, nxyz) = md - for f in mesh.boundary_faces[boundary_key] - for i in Base.OneTo(num_pts_per_face) - - # reverse engineer element + face node indices (avoids reshaping arrays) - e = ((f-1) ÷ num_faces) + 1 - fid = i + ((f-1) % num_faces) * num_pts_per_face - - face_normal = SVector{NDIMS}(getindex.(nxyz, fid, e)) - face_coordinates = SVector{NDIMS}(getindex.(xyzf, fid, e)) - - # for both the gradient and the divergence, the boundary flux is scalar valued. - # for the gradient, it is the solution; for divergence, it is the normal flux. - flux_face_values[fid,e] = boundary_condition(flux_face_values[fid,e], u_face_values[fid,e], - face_normal, face_coordinates, t, - operator_type, equations) - - # Here, we use the "strong form" for the Gradient (and the "weak form" for Divergence). - # `flux_face_values` should contain the boundary values for `u`, and we - # subtract off `u_face_values[fid, e]` because we are using the strong formulation to - # compute the gradient. - if operator_type isa Gradient - flux_face_values[fid, e] = flux_face_values[fid, e] - u_face_values[fid, e] - end - + mesh, equations, dg::DGMulti{NDIMS}, cache, + cache_parabolic) where {NDIMS} + rd = dg.basis + md = mesh.md + + num_faces = StartUpDG.num_faces(rd.element_type) + num_pts_per_face = rd.Nfq ÷ num_faces + (; xyzf, nxyz) = md + for f in mesh.boundary_faces[boundary_key] + for i in Base.OneTo(num_pts_per_face) + + # reverse engineer element + face node indices (avoids reshaping arrays) + e = ((f - 1) ÷ num_faces) + 1 + fid = i + ((f - 1) % num_faces) * num_pts_per_face + + face_normal = SVector{NDIMS}(getindex.(nxyz, fid, e)) + face_coordinates = SVector{NDIMS}(getindex.(xyzf, fid, e)) + + # for both the gradient and the divergence, the boundary flux is scalar valued. + # for the gradient, it is the solution; for divergence, it is the normal flux. + flux_face_values[fid, e] = boundary_condition(flux_face_values[fid, e], + u_face_values[fid, e], + face_normal, face_coordinates, t, + operator_type, equations) + + # Here, we use the "strong form" for the Gradient (and the "weak form" for Divergence). + # `flux_face_values` should contain the boundary values for `u`, and we + # subtract off `u_face_values[fid, e]` because we are using the strong formulation to + # compute the gradient. + if operator_type isa Gradient + flux_face_values[fid, e] = flux_face_values[fid, e] - u_face_values[fid, e] + end + end end - end - return nothing + return nothing end function calc_viscous_fluxes!(flux_viscous, u, gradients, mesh::DGMultiMesh, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) + for dim in eachdim(mesh) + reset_du!(flux_viscous[dim], dg) + end - for dim in eachdim(mesh) - reset_du!(flux_viscous[dim], dg) - end - - (; local_u_values_threaded) = cache_parabolic + (; local_u_values_threaded) = cache_parabolic - @threaded for e in eachelement(mesh, dg) + @threaded for e in eachelement(mesh, dg) - # reset local storage for each element, interpolate u to quadrature points - # TODO: DGMulti. Specialize for nodal collocation methods (SBP, GaussSBP)? - local_u_values = local_u_values_threaded[Threads.threadid()] - fill!(local_u_values, zero(eltype(local_u_values))) - apply_to_each_field(mul_by!(dg.basis.Vq), local_u_values, view(u, :, e)) + # reset local storage for each element, interpolate u to quadrature points + # TODO: DGMulti. Specialize for nodal collocation methods (SBP, GaussSBP)? + local_u_values = local_u_values_threaded[Threads.threadid()] + fill!(local_u_values, zero(eltype(local_u_values))) + apply_to_each_field(mul_by!(dg.basis.Vq), local_u_values, view(u, :, e)) - # compute viscous flux at quad points - for i in eachindex(local_u_values) - u_i = local_u_values[i] - gradients_i = getindex.(gradients, i, e) - for dim in eachdim(mesh) - flux_viscous_i = flux(u_i, gradients_i, dim, equations) - setindex!(flux_viscous[dim], flux_viscous_i, i, e) - end + # compute viscous flux at quad points + for i in eachindex(local_u_values) + u_i = local_u_values[i] + gradients_i = getindex.(gradients, i, e) + for dim in eachdim(mesh) + flux_viscous_i = flux(u_i, gradients_i, dim, equations) + setindex!(flux_viscous[dim], flux_viscous_i, i, e) + end + end end - - end end # no penalization for a BR1 parabolic solver -function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, boundary_conditions, +function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, + boundary_conditions, mesh, equations::AbstractEquationsParabolic, dg::DGMulti, - parabolic_scheme::ViscousFormulationBassiRebay1, cache, cache_parabolic) - return nothing + parabolic_scheme::ViscousFormulationBassiRebay1, cache, + cache_parabolic) + return nothing end -function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, boundary_conditions, +function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, + boundary_conditions, mesh, equations::AbstractEquationsParabolic, dg::DGMulti, parabolic_scheme, cache, cache_parabolic) - # compute fluxes at interfaces - (; scalar_flux_face_values) = cache_parabolic - (; mapM, mapP) = mesh.md - @threaded for face_node_index in each_face_node_global(mesh, dg) - idM, idP = mapM[face_node_index], mapP[face_node_index] - uM, uP = u_face_values[idM], u_face_values[idP] - scalar_flux_face_values[idM] = scalar_flux_face_values[idM] + penalty(uP, uM, equations, parabolic_scheme) - end - return nothing + # compute fluxes at interfaces + (; scalar_flux_face_values) = cache_parabolic + (; mapM, mapP) = mesh.md + @threaded for face_node_index in each_face_node_global(mesh, dg) + idM, idP = mapM[face_node_index], mapP[face_node_index] + uM, uP = u_face_values[idM], u_face_values[idP] + scalar_flux_face_values[idM] = scalar_flux_face_values[idM] + + penalty(uP, uM, equations, parabolic_scheme) + end + return nothing end function calc_divergence_volume_integral!(du, u, flux_viscous, mesh::DGMultiMesh, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) - (; weak_differentiation_matrices) = cache_parabolic - - # compute volume contributions to divergence - @threaded for e in eachelement(mesh, dg) - for i in eachdim(mesh), j in eachdim(mesh) - dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] # assumes mesh is affine - apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj), - view(du, :, e), view(flux_viscous[i], :, e)) + (; weak_differentiation_matrices) = cache_parabolic + + # compute volume contributions to divergence + @threaded for e in eachelement(mesh, dg) + for i in eachdim(mesh), j in eachdim(mesh) + dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] # assumes mesh is affine + apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj), + view(du, :, e), view(flux_viscous[i], :, e)) + end end - end end -function calc_divergence_volume_integral!(du, u, flux_viscous, mesh::DGMultiMesh{NDIMS, <:NonAffine}, +function calc_divergence_volume_integral!(du, u, flux_viscous, + mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations::AbstractEquationsParabolic, dg::DGMulti, cache, cache_parabolic) where {NDIMS} - (; weak_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic - - # compute volume contributions to divergence - @threaded for e in eachelement(mesh, dg) - - local_viscous_flux = local_flux_viscous_threaded[Threads.threadid()][1] - for i in eachdim(mesh) - # rotate flux to reference coordinates - fill!(local_viscous_flux, zero(eltype(local_viscous_flux))) - for j in eachdim(mesh) - for node in eachindex(local_viscous_flux) - local_viscous_flux[node] = local_viscous_flux[node] + dxidxhatj[j, i][node, e] * flux_viscous[j][node, e] + (; weak_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic + + # compute volume contributions to divergence + @threaded for e in eachelement(mesh, dg) + local_viscous_flux = local_flux_viscous_threaded[Threads.threadid()][1] + for i in eachdim(mesh) + # rotate flux to reference coordinates + fill!(local_viscous_flux, zero(eltype(local_viscous_flux))) + for j in eachdim(mesh) + for node in eachindex(local_viscous_flux) + local_viscous_flux[node] = local_viscous_flux[node] + + dxidxhatj[j, i][node, e] * + flux_viscous[j][node, e] + end + end + + # differentiate with respect to reference coordinates + apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[i]), + view(du, :, e), local_viscous_flux) end - end - - # differentiate with respect to reference coordinates - apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[i]), - view(du, :, e), local_viscous_flux) end - end end function calc_divergence!(du, u::StructArray, t, flux_viscous, mesh::DGMultiMesh, equations::AbstractEquationsParabolic, - boundary_conditions, dg::DGMulti, parabolic_scheme, cache, cache_parabolic) + boundary_conditions, dg::DGMulti, parabolic_scheme, cache, + cache_parabolic) + reset_du!(du, dg) - reset_du!(du, dg) + calc_divergence_volume_integral!(du, u, flux_viscous, mesh, equations, dg, cache, + cache_parabolic) - calc_divergence_volume_integral!(du, u, flux_viscous, mesh, equations, dg, cache, cache_parabolic) - - # interpolates from solution coefficients to face quadrature points - (; projection_face_interpolation_matrix) = cache_parabolic - flux_viscous_face_values = cache_parabolic.gradients_face_values # reuse storage - for dim in eachdim(mesh) - apply_to_each_field(mul_by!(projection_face_interpolation_matrix), flux_viscous_face_values[dim], flux_viscous[dim]) - end - - # compute fluxes at interfaces - (; scalar_flux_face_values) = cache_parabolic - (; mapM, mapP, nxyzJ) = mesh.md - - @threaded for face_node_index in each_face_node_global(mesh, dg, cache, cache_parabolic) - idM, idP = mapM[face_node_index], mapP[face_node_index] - - # compute f(u, ∇u) ⋅ n - flux_face_value = zero(eltype(scalar_flux_face_values)) + # interpolates from solution coefficients to face quadrature points + (; projection_face_interpolation_matrix) = cache_parabolic + flux_viscous_face_values = cache_parabolic.gradients_face_values # reuse storage for dim in eachdim(mesh) - fM = flux_viscous_face_values[dim][idM] - fP = flux_viscous_face_values[dim][idP] - # Here, we use the "weak" formulation to compute the divergence (to ensure stability on curved meshes). - flux_face_value = flux_face_value + 0.5 * (fP + fM) * nxyzJ[dim][face_node_index] + apply_to_each_field(mul_by!(projection_face_interpolation_matrix), + flux_viscous_face_values[dim], flux_viscous[dim]) + end + + # compute fluxes at interfaces + (; scalar_flux_face_values) = cache_parabolic + (; mapM, mapP, nxyzJ) = mesh.md + + @threaded for face_node_index in each_face_node_global(mesh, dg, cache, cache_parabolic) + idM, idP = mapM[face_node_index], mapP[face_node_index] + + # compute f(u, ∇u) ⋅ n + flux_face_value = zero(eltype(scalar_flux_face_values)) + for dim in eachdim(mesh) + fM = flux_viscous_face_values[dim][idM] + fP = flux_viscous_face_values[dim][idP] + # Here, we use the "weak" formulation to compute the divergence (to ensure stability on curved meshes). + flux_face_value = flux_face_value + + 0.5 * (fP + fM) * nxyzJ[dim][face_node_index] + end + scalar_flux_face_values[idM] = flux_face_value end - scalar_flux_face_values[idM] = flux_face_value - end - calc_boundary_flux!(scalar_flux_face_values, cache_parabolic.u_face_values, t, Divergence(), - boundary_conditions, mesh, equations, dg, cache, cache_parabolic) + calc_boundary_flux!(scalar_flux_face_values, cache_parabolic.u_face_values, t, + Divergence(), + boundary_conditions, mesh, equations, dg, cache, cache_parabolic) - calc_viscous_penalty!(scalar_flux_face_values, cache_parabolic.u_face_values, t, - boundary_conditions, mesh, equations, dg, parabolic_scheme, - cache, cache_parabolic) + calc_viscous_penalty!(scalar_flux_face_values, cache_parabolic.u_face_values, t, + boundary_conditions, mesh, equations, dg, parabolic_scheme, + cache, cache_parabolic) - # surface contributions - apply_to_each_field(mul_by_accum!(cache_parabolic.divergence_lift_matrix), du, scalar_flux_face_values) + # surface contributions + apply_to_each_field(mul_by_accum!(cache_parabolic.divergence_lift_matrix), du, + scalar_flux_face_values) - # Note: we do not flip the sign of the geometric Jacobian here. - # This is because the parabolic fluxes are assumed to be of the form - # `du/dt + df/dx = dg/dx + source(x,t)`, - # where f(u) is the inviscid flux and g(u) is the viscous flux. - invert_jacobian!(du, mesh, equations, dg, cache; scaling=1.0) + # Note: we do not flip the sign of the geometric Jacobian here. + # This is because the parabolic fluxes are assumed to be of the form + # `du/dt + df/dx = dg/dx + source(x,t)`, + # where f(u) is the inviscid flux and g(u) is the viscous flux. + invert_jacobian!(du, mesh, equations, dg, cache; scaling = 1.0) end # assumptions: parabolic terms are of the form div(f(u, grad(u))) and @@ -393,32 +423,31 @@ end # 2. compute f(u, grad(u)) # 3. compute div(u) # boundary conditions will be applied to both grad(u) and div(u). -function rhs_parabolic!(du, u, t, mesh::DGMultiMesh, equations_parabolic::AbstractEquationsParabolic, +function rhs_parabolic!(du, u, t, mesh::DGMultiMesh, + equations_parabolic::AbstractEquationsParabolic, initial_condition, boundary_conditions, source_terms, dg::DGMulti, parabolic_scheme, cache, cache_parabolic) + reset_du!(du, dg) - reset_du!(du, dg) - - @trixi_timeit timer() "transform variables" begin - (; u_transformed, gradients, flux_viscous) = cache_parabolic - transform_variables!(u_transformed, u, mesh, equations_parabolic, - dg, parabolic_scheme, cache, cache_parabolic) - end - - @trixi_timeit timer() "calc gradient" begin - calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic, - boundary_conditions, dg, cache, cache_parabolic) - end + @trixi_timeit timer() "transform variables" begin + (; u_transformed, gradients, flux_viscous) = cache_parabolic + transform_variables!(u_transformed, u, mesh, equations_parabolic, + dg, parabolic_scheme, cache, cache_parabolic) + end - @trixi_timeit timer() "calc viscous fluxes" begin - calc_viscous_fluxes!(flux_viscous, u_transformed, gradients, - mesh, equations_parabolic, dg, cache, cache_parabolic) - end + @trixi_timeit timer() "calc gradient" begin + calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic, + boundary_conditions, dg, cache, cache_parabolic) + end - @trixi_timeit timer() "calc divergence" begin - calc_divergence!(du, u_transformed, t, flux_viscous, mesh, equations_parabolic, - boundary_conditions, dg, parabolic_scheme, cache, cache_parabolic) - end - return nothing + @trixi_timeit timer() "calc viscous fluxes" begin + calc_viscous_fluxes!(flux_viscous, u_transformed, gradients, + mesh, equations_parabolic, dg, cache, cache_parabolic) + end + @trixi_timeit timer() "calc divergence" begin + calc_divergence!(du, u_transformed, t, flux_viscous, mesh, equations_parabolic, + boundary_conditions, dg, parabolic_scheme, cache, cache_parabolic) + end + return nothing end diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 1031c837efa..884a8fac43b 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # hadamard_sum!(du, A, # flux_is_symmetric, volume_flux, @@ -24,158 +25,168 @@ @inline function hadamard_sum!(du, A, flux_is_symmetric::True, volume_flux, orientation_or_normal_direction, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - AF_ij = 2 * A[i,j] * volume_flux(u_i, u_j, orientation_or_normal_direction, equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end + row_ids, col_ids = axes(A) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for j in col_ids + # This routine computes only the upper-triangular part of the hadamard sum (A .* F). + # We avoid computing the lower-triangular part, and instead accumulate those contributions + # while computing the upper-triangular part (using the fact that A is skew-symmetric and F + # is symmetric). + if j > i + u_j = u[j] + AF_ij = 2 * A[i, j] * + volume_flux(u_i, u_j, orientation_or_normal_direction, + equations) + du_i = du_i + AF_ij + du[j] = du[j] - AF_ij + end + end + du[i] = du_i end - du[i] = du_i - end end # Version for dense operators and non-symmetric fluxes @inline function hadamard_sum!(du, A, flux_is_symmetric::False, volume_flux, orientation::Integer, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - u_j = u[j] - f_ij = volume_flux(u_i, u_j, orientation, equations) - du_i = du_i + 2 * A[i,j] * f_ij + row_ids, col_ids = axes(A) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for j in col_ids + u_j = u[j] + f_ij = volume_flux(u_i, u_j, orientation, equations) + du_i = du_i + 2 * A[i, j] * f_ij + end + du[i] = du_i end - du[i] = du_i - end end @inline function hadamard_sum!(du, A, flux_is_symmetric::False, volume_flux, normal_direction::AbstractVector, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - u_j = u[j] - # The `normal_direction::AbstractVector` has to be passed in twice. - # This is because on curved meshes, nonconservative fluxes are - # evaluated using both the normal and its average at interfaces. - f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations) - du_i = du_i + 2 * A[i,j] * f_ij + row_ids, col_ids = axes(A) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for j in col_ids + u_j = u[j] + # The `normal_direction::AbstractVector` has to be passed in twice. + # This is because on curved meshes, nonconservative fluxes are + # evaluated using both the normal and its average at interfaces. + f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations) + du_i = du_i + 2 * A[i, j] * f_ij + end + du[i] = du_i end - du[i] = du_i - end end # Version for sparse operators and symmetric fluxes -@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC}, +@inline function hadamard_sum!(du, + A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC + }, flux_is_symmetric::True, volume_flux, orientation_or_normal_direction, u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - j = rows[id] - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - A_ij = vals[id] - AF_ij = 2 * A_ij * volume_flux(u_i, u_j, orientation_or_normal_direction, equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end + A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids = axes(A, 2) + rows = rowvals(A_base) + vals = nonzeros(A_base) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for id in nzrange(A_base, i) + j = rows[id] + # This routine computes only the upper-triangular part of the hadamard sum (A .* F). + # We avoid computing the lower-triangular part, and instead accumulate those contributions + # while computing the upper-triangular part (using the fact that A is skew-symmetric and F + # is symmetric). + if j > i + u_j = u[j] + A_ij = vals[id] + AF_ij = 2 * A_ij * + volume_flux(u_i, u_j, orientation_or_normal_direction, + equations) + du_i = du_i + AF_ij + du[j] = du[j] - AF_ij + end + end + du[i] = du_i end - du[i] = du_i - end end # Version for sparse operators and symmetric fluxes with curved meshes -@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC}, +@inline function hadamard_sum!(du, + A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC + }, flux_is_symmetric::True, volume_flux, normal_directions::AbstractVector{<:AbstractVector}, u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - j = rows[id] - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - A_ij = vals[id] - - # provably entropy stable de-aliasing of geometric terms - normal_direction = 0.5 * (getindex.(normal_directions, i) + getindex.(normal_directions, j)) - - AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end + A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids = axes(A, 2) + rows = rowvals(A_base) + vals = nonzeros(A_base) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for id in nzrange(A_base, i) + j = rows[id] + # This routine computes only the upper-triangular part of the hadamard sum (A .* F). + # We avoid computing the lower-triangular part, and instead accumulate those contributions + # while computing the upper-triangular part (using the fact that A is skew-symmetric and F + # is symmetric). + if j > i + u_j = u[j] + A_ij = vals[id] + + # provably entropy stable de-aliasing of geometric terms + normal_direction = 0.5 * (getindex.(normal_directions, i) + + getindex.(normal_directions, j)) + + AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations) + du_i = du_i + AF_ij + du[j] = du[j] - AF_ij + end + end + du[i] = du_i end - du[i] = du_i - end end # TODO: DGMulti. Fix for curved meshes. # Version for sparse operators and non-symmetric fluxes -@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC}, +@inline function hadamard_sum!(du, + A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC + }, flux_is_symmetric::False, volume_flux, normal_direction::AbstractVector, u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - A_ij = vals[id] - j = rows[id] - # The `normal_direction::AbstractVector` has to be passed in twice. - # This is because on curved meshes, nonconservative fluxes are - # evaluated using both the normal and its average at interfaces. - u_j = u[j] - f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations) - du_i = du_i + 2 * A_ij * f_ij + A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids = axes(A, 2) + rows = rowvals(A_base) + vals = nonzeros(A_base) + + for i in row_ids + u_i = u[i] + du_i = du[i] + for id in nzrange(A_base, i) + A_ij = vals[id] + j = rows[id] + # The `normal_direction::AbstractVector` has to be passed in twice. + # This is because on curved meshes, nonconservative fluxes are + # evaluated using both the normal and its average at interfaces. + u_j = u[j] + f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations) + du_i = du_i + 2 * A_ij * f_ij + end + du[i] = du_i end - du[i] = du_i - end end - # For DGMulti implementations, we construct "physical" differentiation operators by taking linear # combinations of reference differentiation operators scaled by geometric change of variables terms. # We use a lazy evaluation of physical differentiation operators, so that we can compute linear @@ -183,36 +194,49 @@ end @inline function build_lazy_physical_derivative(element, orientation, mesh::DGMultiMesh{1}, dg, cache, operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ = mesh.md - # ignore orientation - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1,element],)) + @unpack Qrst_skew = cache + @unpack rxJ = mesh.md + # ignore orientation + return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1, element],)) end @inline function build_lazy_physical_derivative(element, orientation, mesh::DGMultiMesh{2}, dg, cache, operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ, sxJ, ryJ, syJ = mesh.md - if orientation == 1 - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1,element], sxJ[1,element])) - else # if orientation == 2 - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (ryJ[1,element], syJ[1,element])) - end + @unpack Qrst_skew = cache + @unpack rxJ, sxJ, ryJ, syJ = mesh.md + if orientation == 1 + return LazyMatrixLinearCombo(Qrst_skew, + operator_scaling .* + (rxJ[1, element], sxJ[1, element])) + else # if orientation == 2 + return LazyMatrixLinearCombo(Qrst_skew, + operator_scaling .* + (ryJ[1, element], syJ[1, element])) + end end @inline function build_lazy_physical_derivative(element, orientation, mesh::DGMultiMesh{3}, dg, cache, operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ, sxJ, txJ, ryJ, syJ, tyJ, rzJ, szJ, tzJ = mesh.md - if orientation == 1 - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1, element], sxJ[1, element], txJ[1, element])) - elseif orientation == 2 - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (ryJ[1, element], syJ[1, element], tyJ[1, element])) - else # if orientation == 3 - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rzJ[1, element], szJ[1, element], tzJ[1, element])) - end + @unpack Qrst_skew = cache + @unpack rxJ, sxJ, txJ, ryJ, syJ, tyJ, rzJ, szJ, tzJ = mesh.md + if orientation == 1 + return LazyMatrixLinearCombo(Qrst_skew, + operator_scaling .* + (rxJ[1, element], sxJ[1, element], + txJ[1, element])) + elseif orientation == 2 + return LazyMatrixLinearCombo(Qrst_skew, + operator_scaling .* + (ryJ[1, element], syJ[1, element], + tyJ[1, element])) + else # if orientation == 3 + return LazyMatrixLinearCombo(Qrst_skew, + operator_scaling .* + (rzJ[1, element], szJ[1, element], + tzJ[1, element])) + end end # Return the contravariant basis vector corresponding to the Cartesian @@ -222,79 +246,84 @@ end # and jth reference coordinate, respectively. These are geometric terms which # appear when using the chain rule to compute physical derivatives as a linear # combination of reference derivatives. -@inline function get_contravariant_vector(element, orientation, mesh::DGMultiMesh{NDIMS}, cache) where {NDIMS} - # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ], so that this will return - # SVector{2}(rxJ[1, element], ryJ[1, element]) in 2D. - - # assumes geometric terms are constant on each element - dxidxhatj = mesh.md.rstxyzJ - return SVector{NDIMS}(getindex.(dxidxhatj[:, orientation], 1, element)) +@inline function get_contravariant_vector(element, orientation, + mesh::DGMultiMesh{NDIMS}, cache) where {NDIMS} + # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ], so that this will return + # SVector{2}(rxJ[1, element], ryJ[1, element]) in 2D. + + # assumes geometric terms are constant on each element + dxidxhatj = mesh.md.rstxyzJ + return SVector{NDIMS}(getindex.(dxidxhatj[:, orientation], 1, element)) end -@inline function get_contravariant_vector(element, orientation, mesh::DGMultiMesh{NDIMS, NonAffine}, cache) where {NDIMS} - # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ] +@inline function get_contravariant_vector(element, orientation, + mesh::DGMultiMesh{NDIMS, NonAffine}, + cache) where {NDIMS} + # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ] - # assumes geometric terms vary spatially over each element - (; dxidxhatj) = cache - return SVector{NDIMS}(view.(dxidxhatj[:, orientation], :, element)) + # assumes geometric terms vary spatially over each element + (; dxidxhatj) = cache + return SVector{NDIMS}(view.(dxidxhatj[:, orientation], :, element)) end # use hybridized SBP operators for general flux differencing schemes. function compute_flux_differencing_SBP_matrices(dg::DGMulti) - compute_flux_differencing_SBP_matrices(dg, has_sparse_operators(dg)) + compute_flux_differencing_SBP_matrices(dg, has_sparse_operators(dg)) end function compute_flux_differencing_SBP_matrices(dg::DGMulti, sparse_operators) - rd = dg.basis - Qrst_hybridized, VhP, Ph = StartUpDG.hybridized_SBP_operators(rd) - Qrst_skew = map(A -> 0.5 * (A - A'), Qrst_hybridized) - if sparse_operators == true - Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew) - end - return Qrst_skew, VhP, Ph + rd = dg.basis + Qrst_hybridized, VhP, Ph = StartUpDG.hybridized_SBP_operators(rd) + Qrst_skew = map(A -> 0.5 * (A - A'), Qrst_hybridized) + if sparse_operators == true + Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew) + end + return Qrst_skew, VhP, Ph end # use traditional multidimensional SBP operators for SBP approximation types. -function compute_flux_differencing_SBP_matrices(dg::DGMultiFluxDiffSBP, sparse_operators) - rd = dg.basis - @unpack M, Drst, Pq = rd - Qrst = map(D -> M * D, Drst) - Qrst_skew = map(A -> 0.5 * (A - A'), Qrst) - if sparse_operators == true - Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew) - end - return Qrst_skew +function compute_flux_differencing_SBP_matrices(dg::DGMultiFluxDiffSBP, + sparse_operators) + rd = dg.basis + @unpack M, Drst, Pq = rd + Qrst = map(D -> M * D, Drst) + Qrst_skew = map(A -> 0.5 * (A - A'), Qrst) + if sparse_operators == true + Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew) + end + return Qrst_skew end - # For flux differencing SBP-type approximations, store solutions in Matrix{SVector{nvars}}. # This results in a slight speedup for `calc_volume_integral!`. function allocate_nested_array(uEltype, nvars, array_dimensions, dg::DGMultiFluxDiffSBP) - return zeros(SVector{nvars, uEltype}, array_dimensions...) + return zeros(SVector{nvars, uEltype}, array_dimensions...) end -function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffSBP, RealT, uEltype) - - rd = dg.basis - md = mesh.md +function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffSBP, RealT, + uEltype) + rd = dg.basis + md = mesh.md - # for use with flux differencing schemes - Qrst_skew = compute_flux_differencing_SBP_matrices(dg) + # for use with flux differencing schemes + Qrst_skew = compute_flux_differencing_SBP_matrices(dg) - # Todo: DGMulti. Factor common storage into a struct (MeshDataCache?) for reuse across solvers? - # storage for volume quadrature values, face quadrature values, flux values - nvars = nvariables(equations) - u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) - u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) - flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) - lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators + # Todo: DGMulti. Factor common storage into a struct (MeshDataCache?) for reuse across solvers? + # storage for volume quadrature values, face quadrature values, flux values + nvars = nvariables(equations) + u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) + u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) + flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg) + lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators - local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] + local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] - # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing - fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq) for _ in 1:Threads.nthreads()] + # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing + fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq) + for _ in 1:Threads.nthreads()] - return (; md, Qrst_skew, dxidxhatj = md.rstxyzJ, + return (; md, Qrst_skew, dxidxhatj = md.rstxyzJ, invJ = inv.(md.J), lift_scalings, inv_wq = inv.(rd.wq), u_values, u_face_values, flux_face_values, local_values_threaded, fluxdiff_local_threaded) @@ -302,93 +331,101 @@ end # most general create_cache: works for `DGMultiFluxDiff{<:Polynomial}` function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff, RealT, uEltype) - - rd = dg.basis - @unpack md = mesh - - Qrst_skew, VhP, Ph = compute_flux_differencing_SBP_matrices(dg) - - # temp storage for entropy variables at volume quad points - nvars = nvariables(equations) - entropy_var_values = allocate_nested_array(uEltype, nvars, (rd.Nq, md.num_elements), dg) - - # storage for all quadrature points (concatenated volume / face quadrature points) - num_quad_points_total = rd.Nq + rd.Nfq - entropy_projected_u_values = allocate_nested_array(uEltype, nvars, (num_quad_points_total, md.num_elements), dg) - projected_entropy_var_values = allocate_nested_array(uEltype, nvars, (num_quad_points_total, md.num_elements), dg) - - # For this specific solver, `prolong2interfaces` will not be used anymore. - # Instead, this step is also performed in `entropy_projection!`. Thus, we set - # `u_face_values` as a `view` into `entropy_projected_u_values`. We do not do - # the same for `u_values` since we will use that with LoopVectorization, which - # cannot handle such views as of v0.12.66, the latest version at the time of writing. - u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) - u_face_values = view(entropy_projected_u_values, rd.Nq+1:num_quad_points_total, :) - flux_face_values = similar(u_face_values) - - # local storage for interface fluxes, rhs, and source - local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] - - # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing - # The result is then transferred to rhs_local_threaded::StructArray{<:SVector} before - # projecting it and storing it into `du`. - fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total) for _ in 1:Threads.nthreads()] - rhs_local_threaded = [allocate_nested_array(uEltype, nvars, (num_quad_points_total,), dg) for _ in 1:Threads.nthreads()] - - # interpolate geometric terms to both quadrature and face values for curved meshes - (; Vq, Vf) = dg.basis - interpolated_geometric_terms = map(x -> [Vq; Vf] * x, mesh.md.rstxyzJ) - J = rd.Vq * md.J - - return (; md, Qrst_skew, VhP, Ph, + rd = dg.basis + @unpack md = mesh + + Qrst_skew, VhP, Ph = compute_flux_differencing_SBP_matrices(dg) + + # temp storage for entropy variables at volume quad points + nvars = nvariables(equations) + entropy_var_values = allocate_nested_array(uEltype, nvars, (rd.Nq, md.num_elements), + dg) + + # storage for all quadrature points (concatenated volume / face quadrature points) + num_quad_points_total = rd.Nq + rd.Nfq + entropy_projected_u_values = allocate_nested_array(uEltype, nvars, + (num_quad_points_total, + md.num_elements), dg) + projected_entropy_var_values = allocate_nested_array(uEltype, nvars, + (num_quad_points_total, + md.num_elements), dg) + + # For this specific solver, `prolong2interfaces` will not be used anymore. + # Instead, this step is also performed in `entropy_projection!`. Thus, we set + # `u_face_values` as a `view` into `entropy_projected_u_values`. We do not do + # the same for `u_values` since we will use that with LoopVectorization, which + # cannot handle such views as of v0.12.66, the latest version at the time of writing. + u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) + u_face_values = view(entropy_projected_u_values, (rd.Nq + 1):num_quad_points_total, + :) + flux_face_values = similar(u_face_values) + + # local storage for interface fluxes, rhs, and source + local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] + + # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing + # The result is then transferred to rhs_local_threaded::StructArray{<:SVector} before + # projecting it and storing it into `du`. + fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total) + for _ in 1:Threads.nthreads()] + rhs_local_threaded = [allocate_nested_array(uEltype, nvars, + (num_quad_points_total,), dg) + for _ in 1:Threads.nthreads()] + + # interpolate geometric terms to both quadrature and face values for curved meshes + (; Vq, Vf) = dg.basis + interpolated_geometric_terms = map(x -> [Vq; Vf] * x, mesh.md.rstxyzJ) + J = rd.Vq * md.J + + return (; md, Qrst_skew, VhP, Ph, invJ = inv.(J), dxidxhatj = interpolated_geometric_terms, - entropy_var_values, projected_entropy_var_values, entropy_projected_u_values, + entropy_var_values, projected_entropy_var_values, + entropy_projected_u_values, u_values, u_face_values, flux_face_values, local_values_threaded, fluxdiff_local_threaded, rhs_local_threaded) end - # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function. function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, dg::DGMulti) + rd = dg.basis + @unpack Vq = rd + @unpack VhP, entropy_var_values, u_values = cache + @unpack projected_entropy_var_values, entropy_projected_u_values = cache - rd = dg.basis - @unpack Vq = rd - @unpack VhP, entropy_var_values, u_values = cache - @unpack projected_entropy_var_values, entropy_projected_u_values = cache - - apply_to_each_field(mul_by!(Vq), u_values, u) + apply_to_each_field(mul_by!(Vq), u_values, u) - cons2entropy!(entropy_var_values, u_values, equations) + cons2entropy!(entropy_var_values, u_values, equations) - # "VhP" fuses the projection "P" with interpolation to volume and face quadrature "Vh" - apply_to_each_field(mul_by!(VhP), projected_entropy_var_values, entropy_var_values) + # "VhP" fuses the projection "P" with interpolation to volume and face quadrature "Vh" + apply_to_each_field(mul_by!(VhP), projected_entropy_var_values, entropy_var_values) - entropy2cons!(entropy_projected_u_values, projected_entropy_var_values, equations) - return nothing + entropy2cons!(entropy_projected_u_values, projected_entropy_var_values, equations) + return nothing end @inline function cons2entropy!(entropy_var_values::StructArray, - u_values ::StructArray, + u_values::StructArray, equations) - @threaded for i in eachindex(u_values) - entropy_var_values[i] = cons2entropy(u_values[i], equations) - end + @threaded for i in eachindex(u_values) + entropy_var_values[i] = cons2entropy(u_values[i], equations) + end end -@inline function entropy2cons!(entropy_projected_u_values ::StructArray, +@inline function entropy2cons!(entropy_projected_u_values::StructArray, projected_entropy_var_values::StructArray, equations) - @threaded for i in eachindex(projected_entropy_var_values) - entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations) - end + @threaded for i in eachindex(projected_entropy_var_values) + entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], + equations) + end end - # Trait-like system to dispatch based on whether or not the SBP operators are sparse. # Designed to be extendable to include specialized `approximation_types` too. @inline function has_sparse_operators(dg::DGMultiFluxDiff) - rd = dg.basis - return has_sparse_operators(rd.element_type, rd.approximation_type) + rd = dg.basis + return has_sparse_operators(rd.element_type, rd.approximation_type) end # General fallback for DGMulti solvers: @@ -400,15 +437,24 @@ end # For traditional SBP operators on triangles, the operators are fully dense. We avoid using # sum factorization here, which is slower for fully dense matrices. -@inline has_sparse_operators(::Union{Tri, Tet}, approx_type::AT) where {AT <: SBP} = False() +@inline function has_sparse_operators(::Union{Tri, Tet}, + approx_type::AT) where {AT <: SBP} + False() +end # SBP/GaussSBP operators on quads/hexes use tensor-product operators. Thus, sum factorization is # more efficient and we use the sparsity structure. -@inline has_sparse_operators(::Union{Quad, Hex}, approx_type::AT) where {AT <: SBP} = True() +@inline function has_sparse_operators(::Union{Quad, Hex}, + approx_type::AT) where {AT <: SBP} + True() +end @inline has_sparse_operators(::Union{Quad, Hex}, approx_type::GaussSBP) = True() # FD SBP methods have sparse operators -@inline has_sparse_operators(::Union{Line, Quad, Hex}, approx_type::AbstractDerivativeOperator) = True() +@inline function has_sparse_operators(::Union{Line, Quad, Hex}, + approx_type::AbstractDerivativeOperator) + True() +end # Computes flux differencing contribution from each Cartesian direction over a single element. # For dense operators, we do not use sum factorization. @@ -416,35 +462,35 @@ end has_nonconservative_terms::False, volume_flux, has_sparse_operators::False, mesh, equations, dg, cache) - - for dim in eachdim(mesh) - Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) - # True() indicates the volume flux is symmetric - hadamard_sum!(fluxdiff_local, Qi_skew, - True(), volume_flux, - dim, u_local, equations) - end + for dim in eachdim(mesh) + Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) + # True() indicates the volume flux is symmetric + hadamard_sum!(fluxdiff_local, Qi_skew, + True(), volume_flux, + dim, u_local, equations) + end end @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, has_nonconservative_terms::True, volume_flux, has_sparse_operators::False, mesh, equations, dg, cache) - flux_conservative, flux_nonconservative = volume_flux - for dim in eachdim(mesh) - Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) - # True() indicates the flux is symmetric. - hadamard_sum!(fluxdiff_local, Qi_skew, - True(), flux_conservative, - dim, u_local, equations) - - # The final argument .5 scales the operator by 1/2 for the nonconservative terms. - half_Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache, 0.5) - # False() indicates the flux is non-symmetric. - hadamard_sum!(fluxdiff_local, half_Qi_skew, - False(), flux_nonconservative, - dim, u_local, equations) - end + flux_conservative, flux_nonconservative = volume_flux + for dim in eachdim(mesh) + Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) + # True() indicates the flux is symmetric. + hadamard_sum!(fluxdiff_local, Qi_skew, + True(), flux_conservative, + dim, u_local, equations) + + # The final argument .5 scales the operator by 1/2 for the nonconservative terms. + half_Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, + cache, 0.5) + # False() indicates the flux is non-symmetric. + hadamard_sum!(fluxdiff_local, half_Qi_skew, + False(), flux_nonconservative, + dim, u_local, equations) + end end # When the operators are sparse, we use the sum-factorization approach to @@ -453,54 +499,54 @@ end has_nonconservative_terms::False, volume_flux, has_sparse_operators::True, mesh, equations, dg, cache) - @unpack Qrst_skew = cache - for dim in eachdim(mesh) - # There are two ways to write this flux differencing discretization on affine meshes. - # - # 1. Use numerical fluxes in Cartesian directions and sum up the discrete derivative - # operators per coordinate direction accordingly. - # 2. Use discrete derivative operators per coordinate direction and corresponding - # numerical fluxes in arbitrary (non-Cartesian) space directions. - # - # The first option makes it necessary to sum up the individual sparsity - # patterns of each reference coordinate direction. On tensor-product - # elements such as `Quad()` or `Hex()` elements, this increases the number of - # potentially expensive numerical flux evaluations by a factor of `ndims(mesh)`. - # Thus, we use the second option below (which basically corresponds to the - # well-known sum factorization on tensor product elements). - # Note that there is basically no difference for dense derivative operators. - normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) - Q_skew = Qrst_skew[dim] - - # True() indicates the flux is symmetric - hadamard_sum!(fluxdiff_local, Q_skew, - True(), volume_flux, - normal_direction, u_local, equations) - end + @unpack Qrst_skew = cache + for dim in eachdim(mesh) + # There are two ways to write this flux differencing discretization on affine meshes. + # + # 1. Use numerical fluxes in Cartesian directions and sum up the discrete derivative + # operators per coordinate direction accordingly. + # 2. Use discrete derivative operators per coordinate direction and corresponding + # numerical fluxes in arbitrary (non-Cartesian) space directions. + # + # The first option makes it necessary to sum up the individual sparsity + # patterns of each reference coordinate direction. On tensor-product + # elements such as `Quad()` or `Hex()` elements, this increases the number of + # potentially expensive numerical flux evaluations by a factor of `ndims(mesh)`. + # Thus, we use the second option below (which basically corresponds to the + # well-known sum factorization on tensor product elements). + # Note that there is basically no difference for dense derivative operators. + normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) + Q_skew = Qrst_skew[dim] + + # True() indicates the flux is symmetric + hadamard_sum!(fluxdiff_local, Q_skew, + True(), volume_flux, + normal_direction, u_local, equations) + end end @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, has_nonconservative_terms::True, volume_flux, has_sparse_operators::True, mesh, equations, dg, cache) - @unpack Qrst_skew = cache - flux_conservative, flux_nonconservative = volume_flux - for dim in eachdim(mesh) - normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) - Q_skew = Qrst_skew[dim] - - # True() indicates the flux is symmetric - hadamard_sum!(fluxdiff_local, Q_skew, - True(), flux_conservative, - normal_direction, u_local, equations) - - # We scale the operator by 1/2 for the nonconservative terms. - half_Q_skew = LazyMatrixLinearCombo((Q_skew, ), (0.5, )) - # False() indicates the flux is non-symmetric - hadamard_sum!(fluxdiff_local, half_Q_skew, - False(), flux_nonconservative, - normal_direction, u_local, equations) - end + @unpack Qrst_skew = cache + flux_conservative, flux_nonconservative = volume_flux + for dim in eachdim(mesh) + normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) + Q_skew = Qrst_skew[dim] + + # True() indicates the flux is symmetric + hadamard_sum!(fluxdiff_local, Q_skew, + True(), flux_conservative, + normal_direction, u_local, equations) + + # We scale the operator by 1/2 for the nonconservative terms. + half_Q_skew = LazyMatrixLinearCombo((Q_skew,), (0.5,)) + # False() indicates the flux is non-symmetric + hadamard_sum!(fluxdiff_local, half_Q_skew, + False(), flux_nonconservative, + normal_direction, u_local, equations) + end end # calculates volume integral for <:Polynomial approximation types. We @@ -510,101 +556,109 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral, dg::DGMultiFluxDiff, cache) - - @unpack entropy_projected_u_values, Ph = cache - @unpack fluxdiff_local_threaded, rhs_local_threaded = cache - - @threaded for e in eachelement(mesh, dg, cache) - fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) - u_local = view(entropy_projected_u_values, :, e) - - local_flux_differencing!(fluxdiff_local, u_local, e, - have_nonconservative_terms, volume_integral.volume_flux, - has_sparse_operators(dg), - mesh, equations, dg, cache) - - # convert fluxdiff_local::Vector{<:SVector} to StructArray{<:SVector} for faster - # apply_to_each_field performance. - rhs_local = rhs_local_threaded[Threads.threadid()] - for i in Base.OneTo(length(fluxdiff_local)) - rhs_local[i] = fluxdiff_local[i] + @unpack entropy_projected_u_values, Ph = cache + @unpack fluxdiff_local_threaded, rhs_local_threaded = cache + + @threaded for e in eachelement(mesh, dg, cache) + fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] + fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) + u_local = view(entropy_projected_u_values, :, e) + + local_flux_differencing!(fluxdiff_local, u_local, e, + have_nonconservative_terms, + volume_integral.volume_flux, + has_sparse_operators(dg), + mesh, equations, dg, cache) + + # convert fluxdiff_local::Vector{<:SVector} to StructArray{<:SVector} for faster + # apply_to_each_field performance. + rhs_local = rhs_local_threaded[Threads.threadid()] + for i in Base.OneTo(length(fluxdiff_local)) + rhs_local[i] = fluxdiff_local[i] + end + apply_to_each_field(mul_by_accum!(Ph), view(du, :, e), rhs_local) end - apply_to_each_field(mul_by_accum!(Ph), view(du, :, e), rhs_local) - end end function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral, dg::DGMultiFluxDiffSBP, cache) - - @unpack fluxdiff_local_threaded, inv_wq = cache - - @threaded for e in eachelement(mesh, dg, cache) - fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) - u_local = view(u, :, e) - - local_flux_differencing!(fluxdiff_local, u_local, e, - have_nonconservative_terms, volume_integral.volume_flux, - has_sparse_operators(dg), - mesh, equations, dg, cache) - - for i in each_quad_node(mesh, dg, cache) - du[i, e] = du[i, e] + fluxdiff_local[i] * inv_wq[i] + @unpack fluxdiff_local_threaded, inv_wq = cache + + @threaded for e in eachelement(mesh, dg, cache) + fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] + fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) + u_local = view(u, :, e) + + local_flux_differencing!(fluxdiff_local, u_local, e, + have_nonconservative_terms, + volume_integral.volume_flux, + has_sparse_operators(dg), + mesh, equations, dg, cache) + + for i in each_quad_node(mesh, dg, cache) + du[i, e] = du[i, e] + fluxdiff_local[i] * inv_wq[i] + end end - end end - # Specialize since `u_values` isn't computed for DGMultiFluxDiffSBP solvers. function calc_sources!(du, u, t, source_terms, mesh, equations, dg::DGMultiFluxDiffSBP, cache) - md = mesh.md + md = mesh.md - @threaded for e in eachelement(mesh, dg, cache) - for i in each_quad_node(mesh, dg, cache) - du[i, e] += source_terms(u[i, e], SVector(getindex.(md.xyzq, i, e)), t, equations) + @threaded for e in eachelement(mesh, dg, cache) + for i in each_quad_node(mesh, dg, cache) + du[i, e] += source_terms(u[i, e], SVector(getindex.(md.xyzq, i, e)), t, + equations) + end end - end end - # Specializes on Polynomial (e.g., modal) DG methods with a flux differencing volume integral, e.g., # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!` # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3). # Also called by DGMultiFluxDiff{<:GaussSBP} solvers. function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC} + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # this function evaluates the solution at volume and face quadrature points (which was previously - # done in `prolong2interfaces` and `calc_volume_integral`) - @trixi_timeit timer() "entropy_projection!" entropy_projection!(cache, u, mesh, equations, dg) + # this function evaluates the solution at volume and face quadrature points (which was previously + # done in `prolong2interfaces` and `calc_volume_integral`) + @trixi_timeit timer() "entropy_projection!" begin + entropy_projection!(cache, u, mesh, equations, dg) + end - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, have_nonconservative_terms(equations), + equations, + dg.volume_integral, dg, cache) + end - # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl - @trixi_timeit timer() "interface flux" calc_interface_flux!(cache, dg.surface_integral, mesh, - have_nonconservative_terms(equations), - equations, dg) + # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, dg.surface_integral, mesh, + have_nonconservative_terms(equations), equations, dg) + end - @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh, - have_nonconservative_terms(equations), equations, dg) + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, + have_nonconservative_terms(equations), equations, dg) + end - @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations, - dg.surface_integral, dg, cache) + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end - @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache) - @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms, - mesh, equations, dg, cache) + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache) + end - return nothing + return nothing end # Specializes on SBP (e.g., nodal/collocation) DG methods with a flux differencing volume @@ -614,36 +668,40 @@ end function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiffSBP, cache) where {BC, Source} + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + @trixi_timeit timer() "volume integral" calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), + equations, + dg.volume_integral, + dg, cache) + + @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(cache, u, mesh, + equations, + dg.surface_integral, + dg) + + @trixi_timeit timer() "interface flux" calc_interface_flux!(cache, + dg.surface_integral, + mesh, + have_nonconservative_terms(equations), + equations, dg) + + @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, + boundary_conditions, mesh, + have_nonconservative_terms(equations), + equations, dg) - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache, dg.surface_integral, mesh, - have_nonconservative_terms(equations), equations, dg) - - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, - have_nonconservative_terms(equations), equations, dg) - - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) + @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, + equations, + dg.surface_integral, + dg, cache) - @trixi_timeit timer() "Jacobian" invert_jacobian!( - du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache) - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, mesh, equations, dg, cache) + @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms, mesh, + equations, dg, cache) - return nothing + return nothing end - - - end # @muladd diff --git a/src/solvers/dgmulti/flux_differencing_compressible_euler.jl b/src/solvers/dgmulti/flux_differencing_compressible_euler.jl index 530c2b23230..70a29bc73f2 100644 --- a/src/solvers/dgmulti/flux_differencing_compressible_euler.jl +++ b/src/solvers/dgmulti/flux_differencing_compressible_euler.jl @@ -3,8 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - - +#! format: noindent # TODO: Upstream, LoopVectorization # At the time of writing, LoopVectorization.jl cannot handle this kind of @@ -12,171 +11,171 @@ # `entropy2cons`. Thus, we need to insert the physics directly here to # get a significant runtime performance improvement. function cons2entropy!(entropy_var_values::StructArray, - u_values ::StructArray, + u_values::StructArray, equations::CompressibleEulerEquations2D) - # The following is semantically equivalent to - # @threaded for i in eachindex(u_values) - # entropy_var_values[i] = cons2entropy(u_values[i], equations) - # end - # but much more efficient due to explicit optimization via `@turbo` from - # LoopVectorization.jl. - @unpack gamma, inv_gamma_minus_one = equations - - rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(u_values) - w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values) - - @turbo thread=true for i in eachindex( - rho_values, rho_v1_values, rho_v2_values, rho_e_values, - w1_values, w2_values, w3_values, w4_values) - rho = rho_values[i] - rho_v1 = rho_v1_values[i] - rho_v2 = rho_v2_values[i] - rho_e = rho_e_values[i] - - # The following is basically the same code as in `cons2entropy` - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v_square = v1^2 + v2^2 - p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - gamma * log(rho) - rho_p = rho / p - - w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square - w2_values[i] = rho_p * v1 - w3_values[i] = rho_p * v2 - w4_values[i] = -rho_p - end + # The following is semantically equivalent to + # @threaded for i in eachindex(u_values) + # entropy_var_values[i] = cons2entropy(u_values[i], equations) + # end + # but much more efficient due to explicit optimization via `@turbo` from + # LoopVectorization.jl. + @unpack gamma, inv_gamma_minus_one = equations + + rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(u_values) + w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values) + + @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values, + rho_e_values, + w1_values, w2_values, w3_values, w4_values) + rho = rho_values[i] + rho_v1 = rho_v1_values[i] + rho_v2 = rho_v2_values[i] + rho_e = rho_e_values[i] + + # The following is basically the same code as in `cons2entropy` + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v_square = v1^2 + v2^2 + p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - gamma * log(rho) + rho_p = rho / p + + w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square + w2_values[i] = rho_p * v1 + w3_values[i] = rho_p * v2 + w4_values[i] = -rho_p + end end -function entropy2cons!(entropy_projected_u_values ::StructArray, +function entropy2cons!(entropy_projected_u_values::StructArray, projected_entropy_var_values::StructArray, equations::CompressibleEulerEquations2D) - # The following is semantically equivalent to - # @threaded for i in eachindex(projected_entropy_var_values) - # entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations) - # end - # but much more efficient due to explicit optimization via `@turbo` from - # LoopVectorization.jl. - @unpack gamma, inv_gamma_minus_one = equations - gamma_minus_one = gamma - 1 - - rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(entropy_projected_u_values) - w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values) - - @turbo thread=true for i in eachindex( - rho_values, rho_v1_values, rho_v2_values, rho_e_values, - w1_values, w2_values, w3_values, w4_values) - - # The following is basically the same code as in `entropy2cons` - # Convert to entropy `-rho * s` used by - # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD - # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) - # instead of `-rho * s / (gamma - 1)` - w1 = gamma_minus_one * w1_values[i] - w2 = gamma_minus_one * w2_values[i] - w3 = gamma_minus_one * w3_values[i] - w4 = gamma_minus_one * w4_values[i] - - # s = specific entropy, eq. (53) - s = gamma - w1 + (w2^2 + w3^2) / (2 * w4) - - # eq. (52) - rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) * exp(-s * inv_gamma_minus_one) - - # eq. (51) - rho_values[i] = -rho_iota * w4 - rho_v1_values[i] = rho_iota * w2 - rho_v2_values[i] = rho_iota * w3 - rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4)) - end + # The following is semantically equivalent to + # @threaded for i in eachindex(projected_entropy_var_values) + # entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations) + # end + # but much more efficient due to explicit optimization via `@turbo` from + # LoopVectorization.jl. + @unpack gamma, inv_gamma_minus_one = equations + gamma_minus_one = gamma - 1 + + rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(entropy_projected_u_values) + w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values) + + @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values, + rho_e_values, + w1_values, w2_values, w3_values, w4_values) + + # The following is basically the same code as in `entropy2cons` + # Convert to entropy `-rho * s` used by + # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD + # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) + # instead of `-rho * s / (gamma - 1)` + w1 = gamma_minus_one * w1_values[i] + w2 = gamma_minus_one * w2_values[i] + w3 = gamma_minus_one * w3_values[i] + w4 = gamma_minus_one * w4_values[i] + + # s = specific entropy, eq. (53) + s = gamma - w1 + (w2^2 + w3^2) / (2 * w4) + + # eq. (52) + rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) * + exp(-s * inv_gamma_minus_one) + + # eq. (51) + rho_values[i] = -rho_iota * w4 + rho_v1_values[i] = rho_iota * w2 + rho_v2_values[i] = rho_iota * w3 + rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4)) + end end - function cons2entropy!(entropy_var_values::StructArray, - u_values ::StructArray, + u_values::StructArray, equations::CompressibleEulerEquations3D) - # The following is semantically equivalent to - # @threaded for i in eachindex(u_values) - # entropy_var_values[i] = cons2entropy(u_values[i], equations) - # end - # but much more efficient due to explicit optimization via `@turbo` from - # LoopVectorization.jl. - @unpack gamma, inv_gamma_minus_one = equations - - rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(u_values) - w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values) - - @turbo thread=true for i in eachindex( - rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values, - w1_values, w2_values, w3_values, w4_values, w5_values) - rho = rho_values[i] - rho_v1 = rho_v1_values[i] - rho_v2 = rho_v2_values[i] - rho_v3 = rho_v3_values[i] - rho_e = rho_e_values[i] - - # The following is basically the same code as in `cons2entropy` - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - v_square = v1^2 + v2^2 + v3^2 - p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) - s = log(p) - gamma * log(rho) - rho_p = rho / p - - w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square - w2_values[i] = rho_p * v1 - w3_values[i] = rho_p * v2 - w4_values[i] = rho_p * v3 - w5_values[i] = -rho_p - end + # The following is semantically equivalent to + # @threaded for i in eachindex(u_values) + # entropy_var_values[i] = cons2entropy(u_values[i], equations) + # end + # but much more efficient due to explicit optimization via `@turbo` from + # LoopVectorization.jl. + @unpack gamma, inv_gamma_minus_one = equations + + rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(u_values) + w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values) + + @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values, + rho_v3_values, rho_e_values, + w1_values, w2_values, w3_values, w4_values, + w5_values) + rho = rho_values[i] + rho_v1 = rho_v1_values[i] + rho_v2 = rho_v2_values[i] + rho_v3 = rho_v3_values[i] + rho_e = rho_e_values[i] + + # The following is basically the same code as in `cons2entropy` + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + v_square = v1^2 + v2^2 + v3^2 + p = (gamma - 1) * (rho_e - 0.5 * rho * v_square) + s = log(p) - gamma * log(rho) + rho_p = rho / p + + w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square + w2_values[i] = rho_p * v1 + w3_values[i] = rho_p * v2 + w4_values[i] = rho_p * v3 + w5_values[i] = -rho_p + end end -function entropy2cons!(entropy_projected_u_values ::StructArray, +function entropy2cons!(entropy_projected_u_values::StructArray, projected_entropy_var_values::StructArray, equations::CompressibleEulerEquations3D) - # The following is semantically equivalent to - # @threaded for i in eachindex(projected_entropy_var_values) - # entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations) - # end - # but much more efficient due to explicit optimization via `@turbo` from - # LoopVectorization.jl. - @unpack gamma, inv_gamma_minus_one = equations - gamma_minus_one = gamma - 1 - - rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(entropy_projected_u_values) - w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values) - - @turbo thread=true for i in eachindex( - rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values, - w1_values, w2_values, w3_values, w4_values, w5_values) - - # The following is basically the same code as in `entropy2cons` - # Convert to entropy `-rho * s` used by - # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD - # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) - # instead of `-rho * s / (gamma - 1)` - w1 = gamma_minus_one * w1_values[i] - w2 = gamma_minus_one * w2_values[i] - w3 = gamma_minus_one * w3_values[i] - w4 = gamma_minus_one * w4_values[i] - w5 = gamma_minus_one * w5_values[i] - - # s = specific entropy, eq. (53) - s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5) - - # eq. (52) - rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) * exp(-s * inv_gamma_minus_one) - - # eq. (51) - rho_values[i] = -rho_iota * w5 - rho_v1_values[i] = rho_iota * w2 - rho_v2_values[i] = rho_iota * w3 - rho_v3_values[i] = rho_iota * w4 - rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5)) - end + # The following is semantically equivalent to + # @threaded for i in eachindex(projected_entropy_var_values) + # entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations) + # end + # but much more efficient due to explicit optimization via `@turbo` from + # LoopVectorization.jl. + @unpack gamma, inv_gamma_minus_one = equations + gamma_minus_one = gamma - 1 + + rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(entropy_projected_u_values) + w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values) + + @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values, + rho_v3_values, rho_e_values, + w1_values, w2_values, w3_values, w4_values, + w5_values) + + # The following is basically the same code as in `entropy2cons` + # Convert to entropy `-rho * s` used by + # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD + # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1) + # instead of `-rho * s / (gamma - 1)` + w1 = gamma_minus_one * w1_values[i] + w2 = gamma_minus_one * w2_values[i] + w3 = gamma_minus_one * w3_values[i] + w4 = gamma_minus_one * w4_values[i] + w5 = gamma_minus_one * w5_values[i] + + # s = specific entropy, eq. (53) + s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5) + + # eq. (52) + rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) * + exp(-s * inv_gamma_minus_one) + + # eq. (51) + rho_values[i] = -rho_iota * w5 + rho_v1_values[i] = rho_iota * w2 + rho_v2_values[i] = rho_iota * w3 + rho_v3_values[i] = rho_iota * w4 + rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5)) + end end - - - end # @muladd diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index 95a471fa71b..2c5505cc4e9 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -8,21 +8,21 @@ const GaussSBP = Polynomial{Gauss} function tensor_product_quadrature(element_type::Line, r1D, w1D) - return r1D, w1D + return r1D, w1D end function tensor_product_quadrature(element_type::Quad, r1D, w1D) - sq, rq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D)) - ws, wr = vec.(StartUpDG.NodesAndModes.meshgrid(w1D)) - wq = wr .* ws - return rq, sq, wq + sq, rq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D)) + ws, wr = vec.(StartUpDG.NodesAndModes.meshgrid(w1D)) + wq = wr .* ws + return rq, sq, wq end function tensor_product_quadrature(element_type::Hex, r1D, w1D) - rq, sq, tq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D, r1D, r1D)) - wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(w1D, w1D, w1D)) - wq = wr .* ws .* wt - return rq, sq, tq, wq + rq, sq, tq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D, r1D, r1D)) + wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(w1D, w1D, w1D)) + wq = wr .* ws .* wt + return rq, sq, tq, wq end # type parameters for `TensorProductFaceOperator`. @@ -32,7 +32,7 @@ struct Interpolation <: AbstractGaussOperator end # which is used in `VolumeIntegralFluxDifferencing`. # - `Projection{ScaleByFaceWeights=Static.True()}` corresponds to the quadrature-based lifting # operator `LIFT = M \ (Vf' * diagm(rd.wf))`, which is used in `SurfaceIntegralWeakForm` -struct Projection{ScaleByFaceWeights} <: AbstractGaussOperator end +struct Projection{ScaleByFaceWeights} <: AbstractGaussOperator end # used to dispatch for different Gauss interpolation operators abstract type AbstractTensorProductGaussOperator end @@ -41,87 +41,89 @@ abstract type AbstractTensorProductGaussOperator end # # Data for performing tensor product interpolation from volume nodes to face nodes. struct TensorProductGaussFaceOperator{NDIMS, OperatorType <: AbstractGaussOperator, - Tmat, Tweights, Tfweights, Tindices} <: AbstractTensorProductGaussOperator - interp_matrix_gauss_to_face_1d::Tmat - inv_volume_weights_1d::Tweights - face_weights::Tfweights - face_indices_tensor_product::Tindices - nnodes_1d::Int - nfaces::Int + Tmat, Tweights, Tfweights, Tindices} <: + AbstractTensorProductGaussOperator + interp_matrix_gauss_to_face_1d::Tmat + inv_volume_weights_1d::Tweights + face_weights::Tfweights + face_indices_tensor_product::Tindices + nnodes_1d::Int + nfaces::Int end # constructor for a 2D operator function TensorProductGaussFaceOperator(operator::AbstractGaussOperator, dg::DGMulti{2, Quad, GaussSBP}) - rd = dg.basis - - rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg)) - interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1]) - - nnodes_1d = length(rq1D) - - # Permutation of indices in a tensor product form - num_faces = StartUpDG.num_faces(rd.element_type) - indices = reshape(1:length(rd.rf), nnodes_1d, num_faces) - face_indices_tensor_product = zeros(Int, 2, nnodes_1d, ndims(rd.element_type)) - for i in 1:nnodes_1d # loop over nodes in one face - face_indices_tensor_product[:, i, 1] .= indices[i, 1:2] - face_indices_tensor_product[:, i, 2] .= indices[i, 3:4] - end - - T_op = typeof(operator) - Tm = typeof(interp_matrix_gauss_to_face_1d) - Tw = typeof(inv.(wq1D)) - Tf = typeof(rd.wf) - Ti = typeof(face_indices_tensor_product) - return TensorProductGaussFaceOperator{2, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d, - inv.(wq1D), rd.wf, - face_indices_tensor_product, - nnodes_1d, num_faces) + rd = dg.basis + + rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg)) + interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1]) + + nnodes_1d = length(rq1D) + + # Permutation of indices in a tensor product form + num_faces = StartUpDG.num_faces(rd.element_type) + indices = reshape(1:length(rd.rf), nnodes_1d, num_faces) + face_indices_tensor_product = zeros(Int, 2, nnodes_1d, ndims(rd.element_type)) + for i in 1:nnodes_1d # loop over nodes in one face + face_indices_tensor_product[:, i, 1] .= indices[i, 1:2] + face_indices_tensor_product[:, i, 2] .= indices[i, 3:4] + end + + T_op = typeof(operator) + Tm = typeof(interp_matrix_gauss_to_face_1d) + Tw = typeof(inv.(wq1D)) + Tf = typeof(rd.wf) + Ti = typeof(face_indices_tensor_product) + return TensorProductGaussFaceOperator{2, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d, + inv.(wq1D), rd.wf, + face_indices_tensor_product, + nnodes_1d, num_faces) end # constructor for a 3D operator function TensorProductGaussFaceOperator(operator::AbstractGaussOperator, dg::DGMulti{3, Hex, GaussSBP}) - rd = dg.basis - - rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg)) - interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1]) - - nnodes_1d = length(rq1D) - - # Permutation of indices in a tensor product form - num_faces = StartUpDG.num_faces(rd.element_type) - indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, num_faces) - face_indices_tensor_product = zeros(Int, 2, nnodes_1d, nnodes_1d, ndims(rd.element_type)) - for j in 1:nnodes_1d, i in 1:nnodes_1d # loop over nodes in one face - face_indices_tensor_product[:, i, j, 1] .= indices[i, j, 1:2] - face_indices_tensor_product[:, i, j, 2] .= indices[i, j, 3:4] - face_indices_tensor_product[:, i, j, 3] .= indices[i, j, 5:6] - end - - T_op = typeof(operator) - Tm = typeof(interp_matrix_gauss_to_face_1d) - Tw = typeof(inv.(wq1D)) - Tf = typeof(rd.wf) - Ti = typeof(face_indices_tensor_product) - return TensorProductGaussFaceOperator{3, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d, - inv.(wq1D), rd.wf, - face_indices_tensor_product, - nnodes_1d, num_faces) + rd = dg.basis + + rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg)) + interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1]) + + nnodes_1d = length(rq1D) + + # Permutation of indices in a tensor product form + num_faces = StartUpDG.num_faces(rd.element_type) + indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, num_faces) + face_indices_tensor_product = zeros(Int, 2, nnodes_1d, nnodes_1d, + ndims(rd.element_type)) + for j in 1:nnodes_1d, i in 1:nnodes_1d # loop over nodes in one face + face_indices_tensor_product[:, i, j, 1] .= indices[i, j, 1:2] + face_indices_tensor_product[:, i, j, 2] .= indices[i, j, 3:4] + face_indices_tensor_product[:, i, j, 3] .= indices[i, j, 5:6] + end + + T_op = typeof(operator) + Tm = typeof(interp_matrix_gauss_to_face_1d) + Tw = typeof(inv.(wq1D)) + Tf = typeof(rd.wf) + Ti = typeof(face_indices_tensor_product) + return TensorProductGaussFaceOperator{3, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d, + inv.(wq1D), rd.wf, + face_indices_tensor_product, + nnodes_1d, num_faces) end # specialize behavior of `mul_by!(A)` where `A isa TensorProductGaussFaceOperator)` @inline function mul_by!(A::AbstractTensorProductGaussOperator) - return (out, x) -> tensor_product_gauss_face_operator!(out, A, x) + return (out, x) -> tensor_product_gauss_face_operator!(out, A, x) end @inline function tensor_product_gauss_face_operator!(out::AbstractMatrix, A::AbstractTensorProductGaussOperator, x::AbstractMatrix) - @threaded for col in Base.OneTo(size(out, 2)) - tensor_product_gauss_face_operator!(view(out, :, col), A, view(x, :, col)) - end + @threaded for col in Base.OneTo(size(out, 2)) + tensor_product_gauss_face_operator!(view(out, :, col), A, view(x, :, col)) + end end # By default, Julia/LLVM does not use fused multiply-add operations (FMAs). @@ -129,276 +131,317 @@ end # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent +#! format: off # Interpolates values from volume Gauss nodes to face nodes on one element. @inline function tensor_product_gauss_face_operator!(out::AbstractVector, A::TensorProductGaussFaceOperator{2, Interpolation}, x_in::AbstractVector) - - (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A - (; nnodes_1d) = A - - fill!(out, zero(eltype(out))) - - # for 2D GaussSBP nodes, the indexing is first in x, then in y - x = reshape(x_in, nnodes_1d, nnodes_1d) - - # interpolation in the x-direction - @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, 1] - index_right = face_indices_tensor_product[2, i, 1] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[index_left] = out[index_left] + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i] - out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i] +#! format: on + (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A + (; nnodes_1d) = A + + fill!(out, zero(eltype(out))) + + # for 2D GaussSBP nodes, the indexing is first in x, then in y + x = reshape(x_in, nnodes_1d, nnodes_1d) + + # interpolation in the x-direction + @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, 1] + index_right = face_indices_tensor_product[2, i, 1] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[index_left] = out[index_left] + + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i] + out[index_right] = out[index_right] + + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i] + end end - end - - # interpolation in the y-direction - @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, 2] - index_right = face_indices_tensor_product[2, i, 2] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[index_left] = out[index_left] + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj] - out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj] + + # interpolation in the y-direction + @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, 2] + index_right = face_indices_tensor_product[2, i, 2] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[index_left] = out[index_left] + + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj] + out[index_right] = out[index_right] + + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj] + end end - end end # Interpolates values from volume Gauss nodes to face nodes on one element. +#! format: off @inline function tensor_product_gauss_face_operator!(out::AbstractVector, A::TensorProductGaussFaceOperator{3, Interpolation}, x::AbstractVector) - - (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A - (; nnodes_1d) = A - - fill!(out, zero(eltype(out))) - - # for 3D GaussSBP nodes, the indexing is first in y, then x, then z. - x = reshape(x, nnodes_1d, nnodes_1d, nnodes_1d) - - # interpolation in the y-direction - @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 2] - index_right = face_indices_tensor_product[2, i, j, 2] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[index_left] = out[index_left] + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i, j] - out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i, j] +#! format: on + (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A + (; nnodes_1d) = A + + fill!(out, zero(eltype(out))) + + # for 3D GaussSBP nodes, the indexing is first in y, then x, then z. + x = reshape(x, nnodes_1d, nnodes_1d, nnodes_1d) + + # interpolation in the y-direction + @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 2] + index_right = face_indices_tensor_product[2, i, j, 2] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[index_left] = out[index_left] + + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i, j] + out[index_right] = out[index_right] + + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i, j] + end end - end - - # interpolation in the x-direction - @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 1] - index_right = face_indices_tensor_product[2, i, j, 1] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[index_left] = out[index_left] + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj, j] - out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj, j] + + # interpolation in the x-direction + @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 1] + index_right = face_indices_tensor_product[2, i, j, 1] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[index_left] = out[index_left] + + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj, j] + out[index_right] = out[index_right] + + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj, j] + end end - end - - # interpolation in the z-direction - @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 3] - index_right = face_indices_tensor_product[2, i, j, 3] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - # The ordering (i,j) -> (j,i) needs to be reversed for this last face. - # This is due to way we define face nodes for Hex() types in StartUpDG.jl. - out[index_left] = out[index_left] + interp_matrix_gauss_to_face_1d[1, jj] * x[j, i, jj] - out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[j, i, jj] + + # interpolation in the z-direction + @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 3] + index_right = face_indices_tensor_product[2, i, j, 3] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + # The ordering (i,j) -> (j,i) needs to be reversed for this last face. + # This is due to way we define face nodes for Hex() types in StartUpDG.jl. + out[index_left] = out[index_left] + + interp_matrix_gauss_to_face_1d[1, jj] * x[j, i, jj] + out[index_right] = out[index_right] + + interp_matrix_gauss_to_face_1d[2, jj] * x[j, i, jj] + end end - end end # Projects face node values to volume Gauss nodes on one element. +#! format: off @inline function tensor_product_gauss_face_operator!(out_vec::AbstractVector, A::TensorProductGaussFaceOperator{2, Projection{ApplyFaceWeights}}, x::AbstractVector) where {ApplyFaceWeights} - - (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A - (; inv_volume_weights_1d, nnodes_1d) = A - - fill!(out_vec, zero(eltype(out_vec))) - - # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. - # Thus, Base.ReshapedArray should be used if you are setting values in the array. - # `reshape` is fine if you are only accessing values. - # Note that, for 2D GaussSBP nodes, the indexing is first in x, then y - out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d), ()) - - if ApplyFaceWeights == true - @turbo for i in eachindex(x) - x[i] = x[i] * A.face_weights[i] +#! format: on + (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A + (; inv_volume_weights_1d, nnodes_1d) = A + + fill!(out_vec, zero(eltype(out_vec))) + + # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. + # Thus, Base.ReshapedArray should be used if you are setting values in the array. + # `reshape` is fine if you are only accessing values. + # Note that, for 2D GaussSBP nodes, the indexing is first in x, then y + out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d), ()) + + if ApplyFaceWeights == true + @turbo for i in eachindex(x) + x[i] = x[i] * A.face_weights[i] + end end - end - - # interpolation in the x-direction - @turbo for i in Base.OneTo(nnodes_1d) # loop over face nodes - index_left = face_indices_tensor_product[1, i, 1] - index_right = face_indices_tensor_product[2, i, 1] - for jj in Base.OneTo(nnodes_1d) # loop over a line of volume nodes - out[jj, i] = out[jj, i] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] - out[jj, i] = out[jj, i] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + + # interpolation in the x-direction + @turbo for i in Base.OneTo(nnodes_1d) # loop over face nodes + index_left = face_indices_tensor_product[1, i, 1] + index_right = face_indices_tensor_product[2, i, 1] + for jj in Base.OneTo(nnodes_1d) # loop over a line of volume nodes + out[jj, i] = out[jj, i] + + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] + out[jj, i] = out[jj, i] + + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + end end - end - - # interpolation in the y-direction - @turbo for i in Base.OneTo(nnodes_1d) - index_left = face_indices_tensor_product[1, i, 2] - index_right = face_indices_tensor_product[2, i, 2] - # loop over a line of volume nodes - for jj in Base.OneTo(nnodes_1d) - out[i, jj] = out[i, jj] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] - out[i, jj] = out[i, jj] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + + # interpolation in the y-direction + @turbo for i in Base.OneTo(nnodes_1d) + index_left = face_indices_tensor_product[1, i, 2] + index_right = face_indices_tensor_product[2, i, 2] + # loop over a line of volume nodes + for jj in Base.OneTo(nnodes_1d) + out[i, jj] = out[i, jj] + + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] + out[i, jj] = out[i, jj] + + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + end end - end - # apply inv(M) - @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) - out[i, j] = out[i, j] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j] - end + # apply inv(M) + @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) + out[i, j] = out[i, j] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j] + end end # Interpolates values from volume Gauss nodes to face nodes on one element. +#! format: off @inline function tensor_product_gauss_face_operator!(out_vec::AbstractVector, A::TensorProductGaussFaceOperator{3, Projection{ApplyFaceWeights}}, x::AbstractVector) where {ApplyFaceWeights} - - @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A - @unpack inv_volume_weights_1d, nnodes_1d, nfaces = A - - fill!(out_vec, zero(eltype(out_vec))) - - # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. - # Thus, Base.ReshapedArray should be used if you are setting values in the array. - # `reshape` is fine if you are only accessing values. - # Note that, for 3D GaussSBP nodes, the indexing is first in y, then x, then z. - out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d, nnodes_1d), ()) - - if ApplyFaceWeights == true - @turbo for i in eachindex(x) - x[i] = x[i] * A.face_weights[i] +#! format: on + @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A + @unpack inv_volume_weights_1d, nnodes_1d, nfaces = A + + fill!(out_vec, zero(eltype(out_vec))) + + # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. + # Thus, Base.ReshapedArray should be used if you are setting values in the array. + # `reshape` is fine if you are only accessing values. + # Note that, for 3D GaussSBP nodes, the indexing is first in y, then x, then z. + out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d, nnodes_1d), ()) + + if ApplyFaceWeights == true + @turbo for i in eachindex(x) + x[i] = x[i] * A.face_weights[i] + end end - end - - # interpolation in the y-direction - @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 2] - index_right = face_indices_tensor_product[2, i, j, 2] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[jj, i, j] = out[jj, i, j] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] - out[jj, i, j] = out[jj, i, j] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + + # interpolation in the y-direction + @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 2] + index_right = face_indices_tensor_product[2, i, j, 2] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[jj, i, j] = out[jj, i, j] + + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] + out[jj, i, j] = out[jj, i, j] + + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + end end - end - - # interpolation in the x-direction - @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 1] - index_right = face_indices_tensor_product[2, i, j, 1] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - out[i, jj, j] = out[i, jj, j] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] - out[i, jj, j] = out[i, jj, j] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + + # interpolation in the x-direction + @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 1] + index_right = face_indices_tensor_product[2, i, j, 1] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + out[i, jj, j] = out[i, jj, j] + + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] + out[i, jj, j] = out[i, jj, j] + + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + end end - end - - # interpolation in the z-direction - @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face - index_left = face_indices_tensor_product[1, i, j, 3] - index_right = face_indices_tensor_product[2, i, j, 3] - for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes - # The ordering (i,j) -> (j,i) needs to be reversed for this last face. - # This is due to way we define face nodes for Hex() types in StartUpDG.jl. - out[j, i, jj] = out[j, i, jj] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] - out[j, i, jj] = out[j, i, jj] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + + # interpolation in the z-direction + @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face + index_left = face_indices_tensor_product[1, i, j, 3] + index_right = face_indices_tensor_product[2, i, j, 3] + for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes + # The ordering (i,j) -> (j,i) needs to be reversed for this last face. + # This is due to way we define face nodes for Hex() types in StartUpDG.jl. + out[j, i, jj] = out[j, i, jj] + + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left] + out[j, i, jj] = out[j, i, jj] + + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right] + end end - end - # apply inv(M) - @turbo for k in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) - out[i, j, k] = out[i, j, k] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j] * inv_volume_weights_1d[k] - end + # apply inv(M) + @turbo for k in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d), + i in Base.OneTo(nnodes_1d) + + out[i, j, k] = out[i, j, k] * inv_volume_weights_1d[i] * + inv_volume_weights_1d[j] * inv_volume_weights_1d[k] + end end # For now, this is mostly the same as `create_cache` for DGMultiFluxDiff{<:Polynomial}. # In the future, we may modify it so that we can specialize additional parts of GaussSBP() solvers. function create_cache(mesh::DGMultiMesh, equations, - dg::DGMultiFluxDiff{<:GaussSBP, <:Union{Quad, Hex}}, RealT, uEltype) - - # call general Polynomial flux differencing constructor - cache = invoke(create_cache, Tuple{typeof(mesh), typeof(equations), - DGMultiFluxDiff, typeof(RealT), typeof(uEltype)}, - mesh, equations, dg, RealT, uEltype) - - rd = dg.basis - @unpack md = mesh - - # for change of basis prior to the volume integral and entropy projection - r1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, polydeg(dg)) - rq1D, _ = StartUpDG.gauss_quad(0, 0, polydeg(dg)) - interp_matrix_lobatto_to_gauss_1D = polynomial_interpolation_matrix(r1D, rq1D) - interp_matrix_gauss_to_lobatto_1D = polynomial_interpolation_matrix(rq1D, r1D) - NDIMS = ndims(rd.element_type) - interp_matrix_lobatto_to_gauss = SimpleKronecker(NDIMS, interp_matrix_lobatto_to_gauss_1D, uEltype) - interp_matrix_gauss_to_lobatto = SimpleKronecker(NDIMS, interp_matrix_gauss_to_lobatto_1D, uEltype) - inv_gauss_weights = inv.(rd.wq) - - # specialized operators to perform tensor product interpolation to faces for Gauss nodes - interp_matrix_gauss_to_face = TensorProductGaussFaceOperator(Interpolation(), dg) - projection_matrix_gauss_to_face = TensorProductGaussFaceOperator(Projection{Static.False()}(), dg) - - # `LIFT` matrix for Gauss nodes - this is equivalent to `projection_matrix_gauss_to_face` scaled by `diagm(rd.wf)`, - # where `rd.wf` are Gauss node face quadrature weights. - gauss_LIFT = TensorProductGaussFaceOperator(Projection{Static.True()}(), dg) - - nvars = nvariables(equations) - rhs_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] - gauss_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()] - - return (; cache..., projection_matrix_gauss_to_face, gauss_LIFT, inv_gauss_weights, - rhs_volume_local_threaded, gauss_volume_local_threaded, - interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_lobatto, - interp_matrix_gauss_to_face, - create_cache(mesh, equations, dg.volume_integral, dg, RealT, uEltype)...) # add cache specialized on the volume integral + dg::DGMultiFluxDiff{<:GaussSBP, <:Union{Quad, Hex}}, RealT, + uEltype) + + # call general Polynomial flux differencing constructor + cache = invoke(create_cache, + Tuple{typeof(mesh), typeof(equations), + DGMultiFluxDiff, typeof(RealT), typeof(uEltype)}, + mesh, equations, dg, RealT, uEltype) + + rd = dg.basis + @unpack md = mesh + + # for change of basis prior to the volume integral and entropy projection + r1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, polydeg(dg)) + rq1D, _ = StartUpDG.gauss_quad(0, 0, polydeg(dg)) + interp_matrix_lobatto_to_gauss_1D = polynomial_interpolation_matrix(r1D, rq1D) + interp_matrix_gauss_to_lobatto_1D = polynomial_interpolation_matrix(rq1D, r1D) + NDIMS = ndims(rd.element_type) + interp_matrix_lobatto_to_gauss = SimpleKronecker(NDIMS, + interp_matrix_lobatto_to_gauss_1D, + uEltype) + interp_matrix_gauss_to_lobatto = SimpleKronecker(NDIMS, + interp_matrix_gauss_to_lobatto_1D, + uEltype) + inv_gauss_weights = inv.(rd.wq) + + # specialized operators to perform tensor product interpolation to faces for Gauss nodes + interp_matrix_gauss_to_face = TensorProductGaussFaceOperator(Interpolation(), dg) + projection_matrix_gauss_to_face = TensorProductGaussFaceOperator(Projection{ + Static.False() + }(), dg) + + # `LIFT` matrix for Gauss nodes - this is equivalent to `projection_matrix_gauss_to_face` scaled by `diagm(rd.wf)`, + # where `rd.wf` are Gauss node face quadrature weights. + gauss_LIFT = TensorProductGaussFaceOperator(Projection{Static.True()}(), dg) + + nvars = nvariables(equations) + rhs_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] + gauss_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) + for _ in 1:Threads.nthreads()] + + return (; cache..., projection_matrix_gauss_to_face, gauss_LIFT, inv_gauss_weights, + rhs_volume_local_threaded, gauss_volume_local_threaded, + interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_lobatto, + interp_matrix_gauss_to_face, + create_cache(mesh, equations, dg.volume_integral, dg, RealT, uEltype)...) # add cache specialized on the volume integral end # by default, return an empty tuple for volume integral caches create_cache(mesh, equations, volume_integral, dg, RealT, uEltype) = NamedTuple() # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function. -function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff{<:GaussSBP}) - - rd = dg.basis - @unpack Vq = rd - @unpack VhP, entropy_var_values, u_values = cache - @unpack projected_entropy_var_values, entropy_projected_u_values = cache - @unpack interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_face = cache - - @threaded for e in eachelement(mesh, dg, cache) - apply_to_each_field(mul_by!(interp_matrix_lobatto_to_gauss), view(u_values, :, e), view(u, :, e)) - end +function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, + dg::DGMultiFluxDiff{<:GaussSBP}) + rd = dg.basis + @unpack Vq = rd + @unpack VhP, entropy_var_values, u_values = cache + @unpack projected_entropy_var_values, entropy_projected_u_values = cache + @unpack interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_face = cache + + @threaded for e in eachelement(mesh, dg, cache) + apply_to_each_field(mul_by!(interp_matrix_lobatto_to_gauss), + view(u_values, :, e), view(u, :, e)) + end - # transform quadrature values to entropy variables - cons2entropy!(entropy_var_values, u_values, equations) + # transform quadrature values to entropy variables + cons2entropy!(entropy_var_values, u_values, equations) - volume_indices = Base.OneTo(rd.Nq) - face_indices = (rd.Nq + 1):(rd.Nq + rd.Nfq) + volume_indices = Base.OneTo(rd.Nq) + face_indices = (rd.Nq + 1):(rd.Nq + rd.Nfq) - # Interpolate volume Gauss nodes to Gauss face nodes (note the layout of - # `projected_entropy_var_values = [vol pts; face pts]`). - entropy_var_face_values = view(projected_entropy_var_values, face_indices, :) - apply_to_each_field(mul_by!(interp_matrix_gauss_to_face), entropy_var_face_values, entropy_var_values) + # Interpolate volume Gauss nodes to Gauss face nodes (note the layout of + # `projected_entropy_var_values = [vol pts; face pts]`). + entropy_var_face_values = view(projected_entropy_var_values, face_indices, :) + apply_to_each_field(mul_by!(interp_matrix_gauss_to_face), entropy_var_face_values, + entropy_var_values) - # directly copy over volume values (no entropy projection required) - entropy_projected_volume_values = view(entropy_projected_u_values, volume_indices, :) - @threaded for i in eachindex(u_values) - entropy_projected_volume_values[i] = u_values[i] - end + # directly copy over volume values (no entropy projection required) + entropy_projected_volume_values = view(entropy_projected_u_values, volume_indices, + :) + @threaded for i in eachindex(u_values) + entropy_projected_volume_values[i] = u_values[i] + end - # transform entropy to conservative variables on face values - entropy_projected_face_values = view(entropy_projected_u_values, face_indices, :) - entropy2cons!(entropy_projected_face_values, entropy_var_face_values, equations) + # transform entropy to conservative variables on face values + entropy_projected_face_values = view(entropy_projected_u_values, face_indices, :) + entropy2cons!(entropy_projected_face_values, entropy_var_face_values, equations) - return nothing + return nothing end # Assumes cache.flux_face_values is already computed. @@ -406,140 +449,146 @@ end function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGMultiFluxDiff{<:GaussSBP}, cache) + (; gauss_LIFT, gauss_volume_local_threaded) = cache - (; gauss_LIFT, gauss_volume_local_threaded) = cache + @threaded for e in eachelement(mesh, dg, cache) - @threaded for e in eachelement(mesh, dg, cache) + # applies LIFT matrix, output is stored at Gauss nodes + gauss_volume_local = gauss_volume_local_threaded[Threads.threadid()] + apply_to_each_field(mul_by!(gauss_LIFT), gauss_volume_local, + view(cache.flux_face_values, :, e)) - # applies LIFT matrix, output is stored at Gauss nodes - gauss_volume_local = gauss_volume_local_threaded[Threads.threadid()] - apply_to_each_field(mul_by!(gauss_LIFT), gauss_volume_local, view(cache.flux_face_values, :, e)) - - for i in eachindex(gauss_volume_local) - du[i, e] = du[i, e] + gauss_volume_local[i] + for i in eachindex(gauss_volume_local) + du[i, e] = du[i, e] + gauss_volume_local[i] + end end - - end end @inline function flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_flux, dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha=true) - - fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) - u_local = view(cache.entropy_projected_u_values, :, element) - - local_flux_differencing!(fluxdiff_local, u_local, element, - have_nonconservative_terms, - volume_flux, has_sparse_operators(dg), - mesh, equations, dg, cache) - - # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}` - # for faster performance when using `apply_to_each_field`. - rhs_local = cache.rhs_local_threaded[Threads.threadid()] - for i in Base.OneTo(length(fluxdiff_local)) - rhs_local[i] = fluxdiff_local[i] - end - - project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) + cache, alpha = true) + fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()] + fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) + u_local = view(cache.entropy_projected_u_values, :, element) + + local_flux_differencing!(fluxdiff_local, u_local, element, + have_nonconservative_terms, + volume_flux, has_sparse_operators(dg), + mesh, equations, dg, cache) + + # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}` + # for faster performance when using `apply_to_each_field`. + rhs_local = cache.rhs_local_threaded[Threads.threadid()] + for i in Base.OneTo(length(fluxdiff_local)) + rhs_local[i] = fluxdiff_local[i] + end + project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) end function project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh::DGMultiMesh, - dg::DGMulti, cache, alpha=true) - - # Here, we exploit that under a Gauss nodal basis the structure of the projection - # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that - # `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`. - volume_indices = Base.OneTo(dg.basis.Nq) - face_indices = (dg.basis.Nq + 1):(dg.basis.Nq + dg.basis.Nfq) - local_volume_flux = view(rhs_local, volume_indices) - local_face_flux = view(rhs_local, face_indices) - - # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux - rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()] - apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face), rhs_volume_local, local_face_flux) - - # accumulate volume contributions at Gauss nodes - for i in eachindex(rhs_volume_local) - du_local = rhs_volume_local[i] + local_volume_flux[i] * cache.inv_gauss_weights[i] - du[i, element] = du[i, element] + alpha * du_local - end + dg::DGMulti, cache, alpha = true) + + # Here, we exploit that under a Gauss nodal basis the structure of the projection + # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that + # `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`. + volume_indices = Base.OneTo(dg.basis.Nq) + face_indices = (dg.basis.Nq + 1):(dg.basis.Nq + dg.basis.Nfq) + local_volume_flux = view(rhs_local, volume_indices) + local_face_flux = view(rhs_local, face_indices) + + # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux + rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()] + apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face), + rhs_volume_local, local_face_flux) + + # accumulate volume contributions at Gauss nodes + for i in eachindex(rhs_volume_local) + du_local = rhs_volume_local[i] + + local_volume_flux[i] * cache.inv_gauss_weights[i] + du[i, element] = du[i, element] + alpha * du_local + end end function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGMultiFluxDiff{<:GaussSBP}, cache) - - @threaded for e in eachelement(mesh, dg, cache) - flux_differencing_kernel!(du, u, e, mesh, - have_nonconservative_terms, equations, - volume_integral.volume_flux, dg, cache) - end - + @threaded for e in eachelement(mesh, dg, cache) + flux_differencing_kernel!(du, u, e, mesh, + have_nonconservative_terms, equations, + volume_integral.volume_flux, dg, cache) + end end # interpolate back to Lobatto nodes after applying the inverse Jacobian at Gauss points function invert_jacobian_and_interpolate!(du, mesh::DGMultiMesh, equations, - dg::DGMultiFluxDiff{<:GaussSBP}, cache; scaling=-1) - - (; interp_matrix_gauss_to_lobatto, rhs_volume_local_threaded, invJ) = cache - - @threaded for e in eachelement(mesh, dg, cache) - rhs_volume_local = rhs_volume_local_threaded[Threads.threadid()] - - # At this point, `rhs_volume_local` should still be stored at Gauss points. - # We scale it by the inverse Jacobian before transforming back to Lobatto. - for i in eachindex(rhs_volume_local) - rhs_volume_local[i] = du[i, e] * invJ[i, e] * scaling + dg::DGMultiFluxDiff{<:GaussSBP}, cache; + scaling = -1) + (; interp_matrix_gauss_to_lobatto, rhs_volume_local_threaded, invJ) = cache + + @threaded for e in eachelement(mesh, dg, cache) + rhs_volume_local = rhs_volume_local_threaded[Threads.threadid()] + + # At this point, `rhs_volume_local` should still be stored at Gauss points. + # We scale it by the inverse Jacobian before transforming back to Lobatto. + for i in eachindex(rhs_volume_local) + rhs_volume_local[i] = du[i, e] * invJ[i, e] * scaling + end + + # Interpolate result back to Lobatto nodes for ease of analysis, visualization + apply_to_each_field(mul_by!(interp_matrix_gauss_to_lobatto), + view(du, :, e), rhs_volume_local) end - - # Interpolate result back to Lobatto nodes for ease of analysis, visualization - apply_to_each_field(mul_by!(interp_matrix_gauss_to_lobatto), - view(du, :, e), rhs_volume_local) - end - end # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`, # since `invert_jacobian!` is also used in other places (e.g., parabolic terms). function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC, - source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC} - - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # this function evaluates the solution at volume and face quadrature points (which was previously - # done in `prolong2interfaces` and `calc_volume_integral`) - @trixi_timeit timer() "entropy_projection!" entropy_projection!(cache, u, mesh, equations, dg) + source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, + cache) where {Source, BC} + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # this function evaluates the solution at volume and face quadrature points (which was previously + # done in `prolong2interfaces` and `calc_volume_integral`) + @trixi_timeit timer() "entropy_projection!" begin + entropy_projection!(cache, u, mesh, equations, dg) + end - # `du` is stored at Gauss nodes here - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) + # `du` is stored at Gauss nodes here + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end - # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl - @trixi_timeit timer() "interface flux" calc_interface_flux!(cache, dg.surface_integral, mesh, - have_nonconservative_terms(equations), - equations, dg) + # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, dg.surface_integral, mesh, + have_nonconservative_terms(equations), equations, dg) + end - @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh, - have_nonconservative_terms(equations), equations, dg) + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, + have_nonconservative_terms(equations), equations, dg) + end - # `du` is stored at Gauss nodes here - @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations, - dg.surface_integral, dg, cache) + # `du` is stored at Gauss nodes here + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end - # invert Jacobian and map `du` from Gauss to Lobatto nodes - @trixi_timeit timer() "Jacobian" invert_jacobian_and_interpolate!(du, mesh, equations, dg, cache) + # invert Jacobian and map `du` from Gauss to Lobatto nodes + @trixi_timeit timer() "Jacobian" begin + invert_jacobian_and_interpolate!(du, mesh, equations, dg, cache) + end - @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms, - mesh, equations, dg, cache) + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache) + end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgmulti/sbp.jl b/src/solvers/dgmulti/sbp.jl index 18be52b7ba3..ba02d812041 100644 --- a/src/solvers/dgmulti/sbp.jl +++ b/src/solvers/dgmulti/sbp.jl @@ -18,13 +18,13 @@ and """ function DGMulti(approximation_type::AbstractDerivativeOperator; element_type::AbstractElemShape, - surface_flux=flux_central, - surface_integral=SurfaceIntegralWeakForm(surface_flux), - volume_integral=VolumeIntegralWeakForm(), + surface_flux = flux_central, + surface_integral = SurfaceIntegralWeakForm(surface_flux), + volume_integral = VolumeIntegralWeakForm(), kwargs...) - - rd = RefElemData(element_type, approximation_type; kwargs...) - return DG(rd, nothing #= mortar =#, surface_integral, volume_integral) + rd = RefElemData(element_type, approximation_type; kwargs...) + # `nothing` is passed as `mortar` + return DG(rd, nothing, surface_integral, volume_integral) end function DGMulti(element_type::AbstractElemShape, @@ -32,307 +32,340 @@ function DGMulti(element_type::AbstractElemShape, volume_integral, surface_integral; kwargs...) - - DGMulti(approximation_type, element_type=element_type, - surface_integral=surface_integral, volume_integral=volume_integral) + DGMulti(approximation_type, element_type = element_type, + surface_integral = surface_integral, volume_integral = volume_integral) end +function construct_1d_operators(D::AbstractDerivativeOperator, tol) + nodes_1d = collect(grid(D)) + M = SummationByPartsOperators.mass_matrix(D) + if M isa UniformScaling + weights_1d = M * ones(Bool, length(nodes_1d)) + else + weights_1d = diag(M) + end + + # StartUpDG assumes nodes from -1 to +1. Thus, we need to re-scale everything. + # We can adjust the grid spacing as follows. + xmin = SummationByPartsOperators.xmin(D) + xmax = SummationByPartsOperators.xmax(D) + factor = 2 / (xmax - xmin) + @. nodes_1d = factor * (nodes_1d - xmin) - 1 + @. weights_1d = factor * weights_1d + D_1d = droptol!(inv(factor) * sparse(D), tol) + I_1d = Diagonal(ones(Bool, length(nodes_1d))) -function construct_1d_operators(D::AbstractDerivativeOperator, tol) - nodes_1d = collect(grid(D)) - M = SummationByPartsOperators.mass_matrix(D) - if M isa UniformScaling - weights_1d = M * ones(Bool, length(nodes_1d)) - else - weights_1d = diag(M) - end - - # StartUpDG assumes nodes from -1 to +1. Thus, we need to re-scale everything. - # We can adjust the grid spacing as follows. - xmin = SummationByPartsOperators.xmin(D) - xmax = SummationByPartsOperators.xmax(D) - factor = 2 / (xmax - xmin) - @. nodes_1d = factor * (nodes_1d - xmin) - 1 - @. weights_1d = factor * weights_1d - - D_1d = droptol!(inv(factor) * sparse(D), tol) - I_1d = Diagonal(ones(Bool, length(nodes_1d))) - - return nodes_1d, weights_1d, D_1d, I_1d + return nodes_1d, weights_1d, D_1d, I_1d end - function StartUpDG.RefElemData(element_type::Line, D::AbstractDerivativeOperator; - tol = 100*eps()) - - approximation_type = D - N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree - - # 1D operators - nodes_1d, weights_1d, D_1d = construct_1d_operators(D, tol) - - # volume - rq = r = nodes_1d - wq = weights_1d - Dr = D_1d - M = Diagonal(wq) - Pq = LinearAlgebra.I - Vq = LinearAlgebra.I - - VDM = nothing # unused generalized Vandermonde matrix - - rst = (r,) - rstq = (rq,) - Drst = (Dr,) - - # face - face_vertices = StartUpDG.face_vertices(element_type) - face_mask = [1, length(nodes_1d)] - - rf = [-1.0; 1.0] - nrJ = [-1.0; 1.0] - wf = [1.0; 1.0] - if D isa AbstractPeriodicDerivativeOperator - # we do not need any face stuff for periodic operators - Vf = spzeros(length(wf), length(wq)) - else - Vf = sparse([1, 2], [1, length(nodes_1d)], [1.0, 1.0]) - end - LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) - - rstf = (rf,) - nrstJ = (nrJ,) - - # low order interpolation nodes - r1 = StartUpDG.nodes(element_type, 1) - V1 = StartUpDG.vandermonde(element_type, 1, r) / StartUpDG.vandermonde(element_type, 1, r1) - - return RefElemData( - element_type, approximation_type, N, - face_vertices, V1, - rst, VDM, face_mask, - rst, LinearAlgebra.I, # plotting - rstq, wq, Vq, # quadrature - rstf, wf, Vf, nrstJ, # faces - M, Pq, Drst, LIFT) + tol = 100 * eps()) + approximation_type = D + N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree + + # 1D operators + nodes_1d, weights_1d, D_1d = construct_1d_operators(D, tol) + + # volume + rq = r = nodes_1d + wq = weights_1d + Dr = D_1d + M = Diagonal(wq) + Pq = LinearAlgebra.I + Vq = LinearAlgebra.I + + VDM = nothing # unused generalized Vandermonde matrix + + rst = (r,) + rstq = (rq,) + Drst = (Dr,) + + # face + face_vertices = StartUpDG.face_vertices(element_type) + face_mask = [1, length(nodes_1d)] + + rf = [-1.0; 1.0] + nrJ = [-1.0; 1.0] + wf = [1.0; 1.0] + if D isa AbstractPeriodicDerivativeOperator + # we do not need any face stuff for periodic operators + Vf = spzeros(length(wf), length(wq)) + else + Vf = sparse([1, 2], [1, length(nodes_1d)], [1.0, 1.0]) + end + LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) + + rstf = (rf,) + nrstJ = (nrJ,) + + # low order interpolation nodes + r1 = StartUpDG.nodes(element_type, 1) + V1 = StartUpDG.vandermonde(element_type, 1, r) / + StartUpDG.vandermonde(element_type, 1, r1) + + return RefElemData(element_type, approximation_type, N, + face_vertices, V1, + rst, VDM, face_mask, + rst, LinearAlgebra.I, # plotting + rstq, wq, Vq, # quadrature + rstf, wf, Vf, nrstJ, # faces + M, Pq, Drst, LIFT) end - function StartUpDG.RefElemData(element_type::Quad, D::AbstractDerivativeOperator; - tol = 100*eps()) - - approximation_type = D - N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree - - # 1D operators - nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) - - # volume - s, r = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d)) # this is to match - # ordering of nrstJ - rq = r; sq = s - wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d)) - wq = wr .* ws - Dr = kron(I_1d, D_1d) - Ds = kron(D_1d, I_1d) - M = Diagonal(wq) - Pq = LinearAlgebra.I - Vq = LinearAlgebra.I - - VDM = nothing # unused generalized Vandermonde matrix - - rst = (r, s) - rstq = (rq, sq) - Drst = (Dr, Ds) - - # face - face_vertices = StartUpDG.face_vertices(element_type) - face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s)...) - - rf, sf, wf, nrJ, nsJ = StartUpDG.init_face_data(element_type, - quad_rule_face=(nodes_1d, weights_1d)) - if D isa AbstractPeriodicDerivativeOperator - # we do not need any face stuff for periodic operators - Vf = spzeros(length(wf), length(wq)) - else - Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask))) - end - LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) - - rstf = (rf, sf) - nrstJ = (nrJ, nsJ) - - # low order interpolation nodes - r1, s1 = StartUpDG.nodes(element_type, 1) - V1 = StartUpDG.vandermonde(element_type, 1, r, s) / StartUpDG.vandermonde(element_type, 1, r1, s1) - - return RefElemData( - element_type, approximation_type, N, - face_vertices, V1, - rst, VDM, face_mask, - rst, LinearAlgebra.I, # plotting - rstq, wq, Vq, # quadrature - rstf, wf, Vf, nrstJ, # faces - M, Pq, Drst, LIFT) + tol = 100 * eps()) + approximation_type = D + N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree + + # 1D operators + nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) + + # volume + s, r = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d)) # this is to match + # ordering of nrstJ + rq = r + sq = s + wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d)) + wq = wr .* ws + Dr = kron(I_1d, D_1d) + Ds = kron(D_1d, I_1d) + M = Diagonal(wq) + Pq = LinearAlgebra.I + Vq = LinearAlgebra.I + + VDM = nothing # unused generalized Vandermonde matrix + + rst = (r, s) + rstq = (rq, sq) + Drst = (Dr, Ds) + + # face + face_vertices = StartUpDG.face_vertices(element_type) + face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s)...) + + rf, sf, wf, nrJ, nsJ = StartUpDG.init_face_data(element_type, + quad_rule_face = (nodes_1d, weights_1d)) + if D isa AbstractPeriodicDerivativeOperator + # we do not need any face stuff for periodic operators + Vf = spzeros(length(wf), length(wq)) + else + Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask))) + end + LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) + + rstf = (rf, sf) + nrstJ = (nrJ, nsJ) + + # low order interpolation nodes + r1, s1 = StartUpDG.nodes(element_type, 1) + V1 = StartUpDG.vandermonde(element_type, 1, r, s) / + StartUpDG.vandermonde(element_type, 1, r1, s1) + + return RefElemData(element_type, approximation_type, N, + face_vertices, V1, + rst, VDM, face_mask, + rst, LinearAlgebra.I, # plotting + rstq, wq, Vq, # quadrature + rstf, wf, Vf, nrstJ, # faces + M, Pq, Drst, LIFT) end - function StartUpDG.RefElemData(element_type::Hex, D::AbstractDerivativeOperator; - tol = 100*eps()) - - approximation_type = D - N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree - - # 1D operators - nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) - - # volume - # to match ordering of nrstJ - s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d)) - rq = r; sq = s; tq = t - wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d)) - wq = wr .* ws .* wt - Dr = kron(I_1d, I_1d, D_1d) - Ds = kron(I_1d, D_1d, I_1d) - Dt = kron(D_1d, I_1d, I_1d) - M = Diagonal(wq) - Pq = LinearAlgebra.I - Vq = LinearAlgebra.I - - VDM = nothing # unused generalized Vandermonde matrix - - rst = (r, s, t) - rstq = (rq, sq, tq) - Drst = (Dr, Ds, Dt) - - # face - face_vertices = StartUpDG.face_vertices(element_type) - face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s, t)...) - - rf, sf, tf, wf, nrJ, nsJ, ntJ = let - rf, sf = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d)) - wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d)) - wf = wr .* ws - StartUpDG.init_face_data(element_type, quad_rule_face=(rf, sf, wf)) - end - Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask))) - LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) - - rstf = (rf, sf, tf) - nrstJ = (nrJ, nsJ, ntJ) - - # low order interpolation nodes - r1, s1, t1 = StartUpDG.nodes(element_type, 1) - V1 = StartUpDG.vandermonde(element_type, 1, r, s, t) / StartUpDG.vandermonde(element_type, 1, r1, s1, t1) - - return RefElemData( - element_type, approximation_type, N, - face_vertices, V1, - rst, VDM, face_mask, - rst, LinearAlgebra.I, # plotting - rstq, wq, Vq, # quadrature - rstf, wf, Vf, nrstJ, # faces - M, Pq, Drst, LIFT) + tol = 100 * eps()) + approximation_type = D + N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree + + # 1D operators + nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) + + # volume + # to match ordering of nrstJ + s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d)) + rq = r + sq = s + tq = t + wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d)) + wq = wr .* ws .* wt + Dr = kron(I_1d, I_1d, D_1d) + Ds = kron(I_1d, D_1d, I_1d) + Dt = kron(D_1d, I_1d, I_1d) + M = Diagonal(wq) + Pq = LinearAlgebra.I + Vq = LinearAlgebra.I + + VDM = nothing # unused generalized Vandermonde matrix + + rst = (r, s, t) + rstq = (rq, sq, tq) + Drst = (Dr, Ds, Dt) + + # face + face_vertices = StartUpDG.face_vertices(element_type) + face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s, t)...) + + rf, sf, tf, wf, nrJ, nsJ, ntJ = let + rf, sf = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d)) + wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d)) + wf = wr .* ws + StartUpDG.init_face_data(element_type, quad_rule_face = (rf, sf, wf)) + end + Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask))) + LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf)) + + rstf = (rf, sf, tf) + nrstJ = (nrJ, nsJ, ntJ) + + # low order interpolation nodes + r1, s1, t1 = StartUpDG.nodes(element_type, 1) + V1 = StartUpDG.vandermonde(element_type, 1, r, s, t) / + StartUpDG.vandermonde(element_type, 1, r1, s1, t1) + + return RefElemData(element_type, approximation_type, N, + face_vertices, V1, + rst, VDM, face_mask, + rst, LinearAlgebra.I, # plotting + rstq, wq, Vq, # quadrature + rstf, wf, Vf, nrstJ, # faces + M, Pq, Drst, LIFT) end # specialized Hex constructor in 3D to reduce memory usage. function StartUpDG.RefElemData(element_type::Hex, D::AbstractPeriodicDerivativeOperator; - tol = 100*eps()) - - approximation_type = D - N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree - - # 1D operators - nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) - - # volume - # to match ordering of nrstJ - s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d)) - rq = r; sq = s; tq = t - wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d)) - wq = wr .* ws .* wt - Dr = kron(I_1d, I_1d, D_1d) - Ds = kron(I_1d, D_1d, I_1d) - Dt = kron(D_1d, I_1d, I_1d) - M = Diagonal(wq) - Pq = LinearAlgebra.I - Vq = LinearAlgebra.I - - VDM = nothing # unused generalized Vandermonde matrix - - rst = (r, s, t) - rstq = (rq, sq, tq) - Drst = (Dr, Ds, Dt) - - # face - # We do not need any face data for periodic operators. Thus, we just - # pass `nothing` to save memory. - face_vertices = ntuple(_ -> nothing, 3) - face_mask = nothing - wf = nothing - rstf = ntuple(_ -> nothing, 3) - nrstJ = ntuple(_ -> nothing, 3) - Vf = nothing - LIFT = nothing - - # low order interpolation nodes - V1 = nothing # do not need to store V1, since we specialize StartUpDG.MeshData to avoid using it. - - return RefElemData( - element_type, approximation_type, N, - face_vertices, V1, - rst, VDM, face_mask, - rst, LinearAlgebra.I, # plotting - rstq, wq, Vq, # quadrature - rstf, wf, Vf, nrstJ, # faces - M, Pq, Drst, LIFT) + tol = 100 * eps()) + approximation_type = D + N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree + + # 1D operators + nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol) + + # volume + # to match ordering of nrstJ + s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d)) + rq = r + sq = s + tq = t + wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d)) + wq = wr .* ws .* wt + Dr = kron(I_1d, I_1d, D_1d) + Ds = kron(I_1d, D_1d, I_1d) + Dt = kron(D_1d, I_1d, I_1d) + M = Diagonal(wq) + Pq = LinearAlgebra.I + Vq = LinearAlgebra.I + + VDM = nothing # unused generalized Vandermonde matrix + + rst = (r, s, t) + rstq = (rq, sq, tq) + Drst = (Dr, Ds, Dt) + + # face + # We do not need any face data for periodic operators. Thus, we just + # pass `nothing` to save memory. + face_vertices = ntuple(_ -> nothing, 3) + face_mask = nothing + wf = nothing + rstf = ntuple(_ -> nothing, 3) + nrstJ = ntuple(_ -> nothing, 3) + Vf = nothing + LIFT = nothing + + # low order interpolation nodes + V1 = nothing # do not need to store V1, since we specialize StartUpDG.MeshData to avoid using it. + + return RefElemData(element_type, approximation_type, N, + face_vertices, V1, + rst, VDM, face_mask, + rst, LinearAlgebra.I, # plotting + rstq, wq, Vq, # quadrature + rstf, wf, Vf, nrstJ, # faces + M, Pq, Drst, LIFT) end - -function Base.show(io::IO, mime::MIME"text/plain", rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:AbstractDerivativeOperator} - @nospecialize rd - print(io, "RefElemData for an approximation using an ") - show(IOContext(io, :compact => true), rd.approximation_type) - print(io, " on $(rd.element_type) element") +function Base.show(io::IO, mime::MIME"text/plain", + rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, + ElementType <: + StartUpDG.AbstractElemShape, + ApproximationType <: + AbstractDerivativeOperator + } + @nospecialize rd + print(io, "RefElemData for an approximation using an ") + show(IOContext(io, :compact => true), rd.approximation_type) + print(io, " on $(rd.element_type) element") end -function Base.show(io::IO, rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:AbstractDerivativeOperator} - @nospecialize rd - print(io, "RefElemData{", summary(rd.approximation_type), ", ", rd.element_type, "}") +function Base.show(io::IO, + rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, + ElementType <: + StartUpDG.AbstractElemShape, + ApproximationType <: + AbstractDerivativeOperator + } + @nospecialize rd + print(io, "RefElemData{", summary(rd.approximation_type), ", ", rd.element_type, "}") end -function StartUpDG.inverse_trace_constant(rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, ElementType<:Union{Line, Quad, Hex}, ApproximationType<:AbstractDerivativeOperator} - D = rd.approximation_type - - # the inverse trace constant is the maximum eigenvalue corresponding to - # M_f * v = λ * M * v - # where M_f is the face mass matrix and M is the volume mass matrix. - # Since M is diagonal and since M_f is just the boundary "mask" matrix - # (which extracts the first and last entries of a vector), the maximum - # eigenvalue is the inverse of the first or last mass matrix diagonal. - left_weight = SummationByPartsOperators.left_boundary_weight(D) - right_weight = SummationByPartsOperators.right_boundary_weight(D) - max_eigenvalue = max(inv(left_weight), inv(right_weight)) - - # For tensor product elements, the trace constant for higher dimensional - # elements is the one-dimensional trace constant multiplied by `NDIMS`. See - # "GPU-accelerated discontinuous Galerkin methods on hybrid meshes." - # Chan, Jesse, et al (2016), https://doi.org/10.1016/j.jcp.2016.04.003 - # for more details (specifically, Appendix A.1, Theorem A.4). - return NDIMS * max_eigenvalue +function StartUpDG.inverse_trace_constant(rd::RefElemData{NDIMS, ElementType, + ApproximationType}) where {NDIMS, + ElementType <: + Union{ + Line, + Quad, + Hex + }, + ApproximationType <: + AbstractDerivativeOperator + } + D = rd.approximation_type + + # the inverse trace constant is the maximum eigenvalue corresponding to + # M_f * v = λ * M * v + # where M_f is the face mass matrix and M is the volume mass matrix. + # Since M is diagonal and since M_f is just the boundary "mask" matrix + # (which extracts the first and last entries of a vector), the maximum + # eigenvalue is the inverse of the first or last mass matrix diagonal. + left_weight = SummationByPartsOperators.left_boundary_weight(D) + right_weight = SummationByPartsOperators.right_boundary_weight(D) + max_eigenvalue = max(inv(left_weight), inv(right_weight)) + + # For tensor product elements, the trace constant for higher dimensional + # elements is the one-dimensional trace constant multiplied by `NDIMS`. See + # "GPU-accelerated discontinuous Galerkin methods on hybrid meshes." + # Chan, Jesse, et al (2016), https://doi.org/10.1016/j.jcp.2016.04.003 + # for more details (specifically, Appendix A.1, Theorem A.4). + return NDIMS * max_eigenvalue end # type alias for specializing on a periodic SBP operator -const DGMultiPeriodicFDSBP{NDIMS, ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator, SurfaceIntegral, VolumeIntegral} - -const DGMultiFluxDiffPeriodicFDSBP{NDIMS, ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator, SurfaceIntegral<:SurfaceIntegralWeakForm, VolumeIntegral<:VolumeIntegralFluxDifferencing} +const DGMultiPeriodicFDSBP{NDIMS, ApproxType, ElemType} = DGMulti{NDIMS, ElemType, + ApproxType, + SurfaceIntegral, + VolumeIntegral + } where {NDIMS, ElemType, + ApproxType <: + SummationByPartsOperators.AbstractPeriodicDerivativeOperator, + SurfaceIntegral, + VolumeIntegral} + +const DGMultiFluxDiffPeriodicFDSBP{NDIMS, ApproxType, ElemType} = DGMulti{NDIMS, ElemType, + ApproxType, + SurfaceIntegral, + VolumeIntegral + } where {NDIMS, + ElemType, + ApproxType <: + SummationByPartsOperators.AbstractPeriodicDerivativeOperator, + SurfaceIntegral <: + SurfaceIntegralWeakForm, + VolumeIntegral <: + VolumeIntegralFluxDifferencing + } """ DGMultiMesh(dg::DGMulti) @@ -342,71 +375,72 @@ a DGMulti with `approximation_type` set to a periodic (finite difference) SBP op SummationByPartsOperators.jl. """ function DGMultiMesh(dg::DGMultiPeriodicFDSBP{NDIMS}; - coordinates_min=ntuple(_ -> -one(real(dg)), NDIMS), - coordinates_max=ntuple(_ -> one(real(dg)), NDIMS)) where {NDIMS} - - rd = dg.basis - - e = Ones{eltype(rd.r)}(size(rd.r)) - z = Zeros{eltype(rd.r)}(size(rd.r)) - - VXYZ = ntuple(_ -> [], NDIMS) - EToV = NaN # StartUpDG.jl uses size(EToV, 1) for the number of elements, this lets us reuse that. - FToF = [] - - # We need to scale the domain from `[-1, 1]^NDIMS` (default in StartUpDG.jl) - # to the given `coordinates_min, coordinates_max` - xyz = xyzq = map(copy, rd.rst) - for dim in 1:NDIMS - factor = (coordinates_max[dim] - coordinates_min[dim]) / 2 - @. xyz[dim] = factor * (xyz[dim] + 1) + coordinates_min[dim] - end - xyzf = ntuple(_ -> [], NDIMS) - wJq = diag(rd.M) - - # arrays of connectivity indices between face nodes - mapM = mapP = mapB = [] - - # volume geofacs Gij = dx_i/dxhat_j - coord_diffs = coordinates_max .- coordinates_min - - J_scalar = prod(coord_diffs) / 2^NDIMS - J = e * J_scalar - - if NDIMS == 1 - rxJ = J_scalar * 2 / coord_diffs[1] - rstxyzJ = @SMatrix [rxJ * e] - elseif NDIMS == 2 - rxJ = J_scalar * 2 / coord_diffs[1] - syJ = J_scalar * 2 / coord_diffs[2] - rstxyzJ = @SMatrix [rxJ * e z; z syJ * e] - elseif NDIMS == 3 - rxJ = J_scalar * 2 / coord_diffs[1] - syJ = J_scalar * 2 / coord_diffs[2] - tzJ = J_scalar * 2 / coord_diffs[3] - rstxyzJ = @SMatrix [rxJ * e z z; z syJ * e z; z z tzJ * e] - end - - # surface geofacs - nxyzJ = ntuple(_ -> [], NDIMS) - Jf = [] - - periodicity = ntuple(_ -> true, NDIMS) - - if NDIMS == 1 - mesh_type = Line() - elseif NDIMS == 2 - mesh_type = Quad() - elseif NDIMS == 3 - mesh_type = Hex() - end - - md = MeshData(StartUpDG.VertexMappedMesh(mesh_type, VXYZ, EToV), FToF, xyz, xyzf, xyzq, wJq, - mapM, mapP, mapB, rstxyzJ, J, nxyzJ, Jf, - periodicity) - - boundary_faces = [] - return DGMultiMesh{NDIMS, rd.element_type, typeof(md), typeof(boundary_faces)}(md, boundary_faces) + coordinates_min = ntuple(_ -> -one(real(dg)), NDIMS), + coordinates_max = ntuple(_ -> one(real(dg)), NDIMS)) where {NDIMS} + rd = dg.basis + + e = Ones{eltype(rd.r)}(size(rd.r)) + z = Zeros{eltype(rd.r)}(size(rd.r)) + + VXYZ = ntuple(_ -> [], NDIMS) + EToV = NaN # StartUpDG.jl uses size(EToV, 1) for the number of elements, this lets us reuse that. + FToF = [] + + # We need to scale the domain from `[-1, 1]^NDIMS` (default in StartUpDG.jl) + # to the given `coordinates_min, coordinates_max` + xyz = xyzq = map(copy, rd.rst) + for dim in 1:NDIMS + factor = (coordinates_max[dim] - coordinates_min[dim]) / 2 + @. xyz[dim] = factor * (xyz[dim] + 1) + coordinates_min[dim] + end + xyzf = ntuple(_ -> [], NDIMS) + wJq = diag(rd.M) + + # arrays of connectivity indices between face nodes + mapM = mapP = mapB = [] + + # volume geofacs Gij = dx_i/dxhat_j + coord_diffs = coordinates_max .- coordinates_min + + J_scalar = prod(coord_diffs) / 2^NDIMS + J = e * J_scalar + + if NDIMS == 1 + rxJ = J_scalar * 2 / coord_diffs[1] + rstxyzJ = @SMatrix [rxJ * e] + elseif NDIMS == 2 + rxJ = J_scalar * 2 / coord_diffs[1] + syJ = J_scalar * 2 / coord_diffs[2] + rstxyzJ = @SMatrix [rxJ*e z; z syJ*e] + elseif NDIMS == 3 + rxJ = J_scalar * 2 / coord_diffs[1] + syJ = J_scalar * 2 / coord_diffs[2] + tzJ = J_scalar * 2 / coord_diffs[3] + rstxyzJ = @SMatrix [rxJ*e z z; z syJ*e z; z z tzJ*e] + end + + # surface geofacs + nxyzJ = ntuple(_ -> [], NDIMS) + Jf = [] + + periodicity = ntuple(_ -> true, NDIMS) + + if NDIMS == 1 + mesh_type = Line() + elseif NDIMS == 2 + mesh_type = Quad() + elseif NDIMS == 3 + mesh_type = Hex() + end + + md = MeshData(StartUpDG.VertexMappedMesh(mesh_type, VXYZ, EToV), FToF, xyz, xyzf, xyzq, + wJq, + mapM, mapP, mapB, rstxyzJ, J, nxyzJ, Jf, + periodicity) + + boundary_faces = [] + return DGMultiMesh{NDIMS, rd.element_type, typeof(md), typeof(boundary_faces)}(md, + boundary_faces) end # By default, Julia/LLVM does not use fused multiply-add operations (FMAs). @@ -414,64 +448,70 @@ end # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # This is used in `estimate_dt`. `estimate_h` uses that `Jf / J = O(h^{NDIMS-1}) / O(h^{NDIMS}) = O(1/h)`. # However, since we do not initialize `Jf` for periodic FDSBP operators, we specialize `estimate_h` # based on the reference grid provided by SummationByPartsOperators.jl and information about the domain size # provided by `md::MeshData``. -function StartUpDG.estimate_h(e, rd::RefElemData{NDIMS, ElementType, ApproximationType}, md::MeshData) where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator} - D = rd.approximation_type - x = grid(D) - - # we assume all SummationByPartsOperators.jl reference grids are rescaled to [-1, 1] - xmin = SummationByPartsOperators.xmin(D) - xmax = SummationByPartsOperators.xmax(D) - factor = 2 / (xmax - xmin) - - # If the domain has size L^NDIMS, then `minimum(md.J)^(1 / NDIMS) = L`. - # WARNING: this is not a good estimate on anisotropic grids. - return minimum(diff(x)) * factor * minimum(md.J)^(1 / NDIMS) +function StartUpDG.estimate_h(e, rd::RefElemData{NDIMS, ElementType, ApproximationType}, + md::MeshData) where {NDIMS, + ElementType <: + StartUpDG.AbstractElemShape, + ApproximationType <: + SummationByPartsOperators.AbstractPeriodicDerivativeOperator + } + D = rd.approximation_type + x = grid(D) + + # we assume all SummationByPartsOperators.jl reference grids are rescaled to [-1, 1] + xmin = SummationByPartsOperators.xmin(D) + xmax = SummationByPartsOperators.xmax(D) + factor = 2 / (xmax - xmin) + + # If the domain has size L^NDIMS, then `minimum(md.J)^(1 / NDIMS) = L`. + # WARNING: this is not a good estimate on anisotropic grids. + return minimum(diff(x)) * factor * minimum(md.J)^(1 / NDIMS) end # specialized for DGMultiPeriodicFDSBP since there are no face nodes # and thus no inverse trace constant for periodic domains. function estimate_dt(mesh::DGMultiMesh, dg::DGMultiPeriodicFDSBP) - rd = dg.basis # RefElemData - return StartUpDG.estimate_h(rd, mesh.md) + rd = dg.basis # RefElemData + return StartUpDG.estimate_h(rd, mesh.md) end # do nothing for interface terms if using a periodic operator # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations, surface_integral, dg::DGMultiPeriodicFDSBP) - @assert nelements(mesh, dg, cache) == 1 - nothing + @assert nelements(mesh, dg, cache) == 1 + nothing end function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, dg::DGMultiPeriodicFDSBP) - @assert nelements(mesh, dg, cache) == 1 - nothing + @assert nelements(mesh, dg, cache) == 1 + nothing end function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGMultiPeriodicFDSBP, cache) - @assert nelements(mesh, dg, cache) == 1 - nothing + @assert nelements(mesh, dg, cache) == 1 + nothing end function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffPeriodicFDSBP, RealT, uEltype) + md = mesh.md - md = mesh.md - - # storage for volume quadrature values, face quadrature values, flux values - nvars = nvariables(equations) - u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) - return (; u_values, invJ = inv.(md.J) ) + # storage for volume quadrature values, face quadrature values, flux values + nvars = nvariables(equations) + u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg) + return (; u_values, invJ = inv.(md.J)) end # Specialize calc_volume_integral for periodic SBP operators (assumes the operator is sparse). @@ -479,64 +519,61 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGMultiFluxDiffPeriodicFDSBP, cache) - - @unpack volume_flux = volume_integral - - # We expect speedup over the serial version only when using two or more threads - # since the threaded version below does not exploit the symmetry properties, - # resulting in a performance penalty of 1/2 - if Threads.nthreads() > 1 - - for dim in eachdim(mesh) - normal_direction = get_contravariant_vector(1, dim, mesh, cache) - - # These are strong-form operators of the form `D = M \ Q` where `M` is diagonal - # and `Q` is skew-symmetric. Since `M` is diagonal, `inv(M)` scales the rows of `Q`. - # Then, `1 / M[i,i] * ∑_j Q[i,j] * volume_flux(u[i], u[j])` is equivalent to - # `= ∑_j (1 / M[i,i] * Q[i,j]) * volume_flux(u[i], u[j])` - # `= ∑_j D[i,j] * volume_flux(u[i], u[j])` - # TODO: DGMulti. - # This would have to be changed if `has_nonconservative_terms = False()` - # because then `volume_flux` is non-symmetric. - A = dg.basis.Drst[dim] - - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - @threaded for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - j = rows[id] - u_j = u[j] - A_ij = vals[id] - AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations) - du_i = du_i + AF_ij + @unpack volume_flux = volume_integral + + # We expect speedup over the serial version only when using two or more threads + # since the threaded version below does not exploit the symmetry properties, + # resulting in a performance penalty of 1/2 + if Threads.nthreads() > 1 + for dim in eachdim(mesh) + normal_direction = get_contravariant_vector(1, dim, mesh, cache) + + # These are strong-form operators of the form `D = M \ Q` where `M` is diagonal + # and `Q` is skew-symmetric. Since `M` is diagonal, `inv(M)` scales the rows of `Q`. + # Then, `1 / M[i,i] * ∑_j Q[i,j] * volume_flux(u[i], u[j])` is equivalent to + # `= ∑_j (1 / M[i,i] * Q[i,j]) * volume_flux(u[i], u[j])` + # `= ∑_j D[i,j] * volume_flux(u[i], u[j])` + # TODO: DGMulti. + # This would have to be changed if `has_nonconservative_terms = False()` + # because then `volume_flux` is non-symmetric. + A = dg.basis.Drst[dim] + + A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids = axes(A, 2) + rows = rowvals(A_base) + vals = nonzeros(A_base) + + @threaded for i in row_ids + u_i = u[i] + du_i = du[i] + for id in nzrange(A_base, i) + j = rows[id] + u_j = u[j] + A_ij = vals[id] + AF_ij = 2 * A_ij * + volume_flux(u_i, u_j, normal_direction, equations) + du_i = du_i + AF_ij + end + du[i] = du_i + end end - du[i] = du_i - end - end - else # if using two threads or fewer + else # if using two threads or fewer - # Calls `hadamard_sum!``, which uses symmetry to reduce flux evaluations. Symmetry - # is expected to yield about a 2x speedup, so we default to the symmetry-exploiting - # volume integral unless we have >2 threads (which should yield >2 speedup). - for dim in eachdim(mesh) - normal_direction = get_contravariant_vector(1, dim, mesh, cache) + # Calls `hadamard_sum!``, which uses symmetry to reduce flux evaluations. Symmetry + # is expected to yield about a 2x speedup, so we default to the symmetry-exploiting + # volume integral unless we have >2 threads (which should yield >2 speedup). + for dim in eachdim(mesh) + normal_direction = get_contravariant_vector(1, dim, mesh, cache) - A = dg.basis.Drst[dim] + A = dg.basis.Drst[dim] - # since has_nonconservative_terms::False, - # the volume flux is symmetric. - flux_is_symmetric = True() - hadamard_sum!(du, A, flux_is_symmetric, volume_flux, - normal_direction, u, equations) + # since has_nonconservative_terms::False, + # the volume flux is symmetric. + flux_is_symmetric = True() + hadamard_sum!(du, A, flux_is_symmetric, volume_flux, + normal_direction, u, equations) + end end - - end end - end # @muladd diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl index bbda089ee58..d224e5ed03d 100644 --- a/src/solvers/dgmulti/shock_capturing.jl +++ b/src/solvers/dgmulti/shock_capturing.jl @@ -2,155 +2,153 @@ function create_cache(mesh::DGMultiMesh{NDIMS}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGMultiFluxDiff{<:GaussSBP}, RealT, uEltype) where {NDIMS} - element_ids_dg = Int[] - element_ids_dgfv = Int[] - - # build element to element (element_to_element_connectivity) connectivity for smoothing of - # shock capturing parameters. - face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix - element_to_element_connectivity = similar(face_to_face_connectivity) - for e in axes(face_to_face_connectivity, 2) - for f in axes(face_to_face_connectivity, 1) - neighbor_face_index = face_to_face_connectivity[f, e] - - # reverse-engineer element index from face. Assumes all elements - # have the same number of faces. - neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1 - element_to_element_connectivity[f, e] = neighbor_element_index + element_ids_dg = Int[] + element_ids_dgfv = Int[] + + # build element to element (element_to_element_connectivity) connectivity for smoothing of + # shock capturing parameters. + face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix + element_to_element_connectivity = similar(face_to_face_connectivity) + for e in axes(face_to_face_connectivity, 2) + for f in axes(face_to_face_connectivity, 1) + neighbor_face_index = face_to_face_connectivity[f, e] + + # reverse-engineer element index from face. Assumes all elements + # have the same number of faces. + neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1 + element_to_element_connectivity[f, e] = neighbor_element_index + end end - end - # create sparse hybridized operators for low order scheme - Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis) - Brst = map(n -> Diagonal(n .* dg.basis.wf), dg.basis.nrstJ) - sparse_hybridized_SBP_operators = map((Q, B) -> 0.5 * [Q-Q' E'*B; -B*E zeros(size(B))], Qrst, Brst) + # create sparse hybridized operators for low order scheme + Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis) + Brst = map(n -> Diagonal(n .* dg.basis.wf), dg.basis.nrstJ) + sparse_hybridized_SBP_operators = map((Q, B) -> 0.5 * [Q-Q' E'*B; -B*E zeros(size(B))], + Qrst, Brst) - # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as - # an adjoint for faster iteration through the rows. - sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .> 100 * eps() + # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as + # an adjoint for faster iteration through the rows. + sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .> + 100 * eps() - return (; element_ids_dg, element_ids_dgfv, + return (; element_ids_dg, element_ids_dgfv, sparse_hybridized_SBP_operators, sparsity_pattern, element_to_element_connectivity) end - # this method is used when the indicator is constructed as for shock-capturing volume integrals function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations, - basis::RefElemData{NDIMS}) where NDIMS - - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) + basis::RefElemData{NDIMS}) where {NDIMS} + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) - A = Vector{real(basis)} - indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Vector{real(basis)} + indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - # initialize inverse Vandermonde matrices at Gauss-Legendre nodes - (; N) = basis - lobatto_node_coordinates_1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, N) - VDM_1D = StartUpDG.vandermonde(Line(), N, lobatto_node_coordinates_1D) - inverse_vandermonde = SimpleKronecker(NDIMS, inv(VDM_1D)) + # initialize inverse Vandermonde matrices at Gauss-Legendre nodes + (; N) = basis + lobatto_node_coordinates_1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, N) + VDM_1D = StartUpDG.vandermonde(Line(), N, lobatto_node_coordinates_1D) + inverse_vandermonde = SimpleKronecker(NDIMS, inv(VDM_1D)) - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) end - function (indicator_hg::IndicatorHennemannGassner)(u, mesh::DGMultiMesh, equations, dg::DGMulti{NDIMS}, cache; kwargs...) where {NDIMS} - (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg - (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg.cache - - resize!(alpha, nelements(mesh, dg)) - if alpha_smooth - resize!(alpha_tmp, nelements(mesh, dg)) - end - - # magic parameters - threshold = 0.5 * 10^(-1.8 * (dg.basis.N + 1)^0.25) - parameter_s = log((1 - 0.0001) / 0.0001) - - @threaded for element in eachelement(mesh, dg) - indicator = indicator_threaded[Threads.threadid()] - modal_ = modal_threaded[Threads.threadid()] - - # Calculate indicator variable at interpolation (Lobatto) nodes. - # TODO: calculate indicator variables at Gauss nodes or using `cache.entropy_projected_u_values` - for i in eachnode(dg) - indicator[i] = indicator_hg.variable(u[i, element], equations) - end + (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg + (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg.cache - # multiply by invVDM::SimpleKronecker - LinearAlgebra.mul!(modal_, inverse_vandermonde, indicator) - - # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. - # Thus, Base.ReshapedArray should be used if you are setting values in the array. - # `reshape` is fine if you are only accessing values. - # Here, we reshape modal coefficients to expose the tensor product structure. - modal = Base.ReshapedArray(modal_, ntuple(_ -> dg.basis.N + 1, NDIMS), ()) - - # Calculate total energies for all modes, all modes minus the highest mode, and - # all modes without the two highest modes - total_energy = sum(x -> x^2, modal) - clip_1_ranges = ntuple(_ -> Base.OneTo(dg.basis.N), NDIMS) - clip_2_ranges = ntuple(_ -> Base.OneTo(dg.basis.N - 1), NDIMS) - # These splattings do not seem to allocate as of Julia 1.9.0? - total_energy_clip1 = sum(x -> x^2, view(modal, clip_1_ranges...)) - total_energy_clip2 = sum(x -> x^2, view(modal, clip_2_ranges...)) - - # Calculate energy in higher modes - if !(iszero(total_energy)) - energy_frac_1 = (total_energy - total_energy_clip1) / total_energy - else - energy_frac_1 = zero(total_energy) - end - if !(iszero(total_energy_clip1)) - energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 - else - energy_frac_2 = zero(total_energy_clip1) + resize!(alpha, nelements(mesh, dg)) + if alpha_smooth + resize!(alpha_tmp, nelements(mesh, dg)) end - energy = max(energy_frac_1, energy_frac_2) - alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) - - # Take care of the case close to pure DG - if alpha_element < alpha_min - alpha_element = zero(alpha_element) + # magic parameters + threshold = 0.5 * 10^(-1.8 * (dg.basis.N + 1)^0.25) + parameter_s = log((1 - 0.0001) / 0.0001) + + @threaded for element in eachelement(mesh, dg) + indicator = indicator_threaded[Threads.threadid()] + modal_ = modal_threaded[Threads.threadid()] + + # Calculate indicator variable at interpolation (Lobatto) nodes. + # TODO: calculate indicator variables at Gauss nodes or using `cache.entropy_projected_u_values` + for i in eachnode(dg) + indicator[i] = indicator_hg.variable(u[i, element], equations) + end + + # multiply by invVDM::SimpleKronecker + LinearAlgebra.mul!(modal_, inverse_vandermonde, indicator) + + # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. + # Thus, Base.ReshapedArray should be used if you are setting values in the array. + # `reshape` is fine if you are only accessing values. + # Here, we reshape modal coefficients to expose the tensor product structure. + modal = Base.ReshapedArray(modal_, ntuple(_ -> dg.basis.N + 1, NDIMS), ()) + + # Calculate total energies for all modes, all modes minus the highest mode, and + # all modes without the two highest modes + total_energy = sum(x -> x^2, modal) + clip_1_ranges = ntuple(_ -> Base.OneTo(dg.basis.N), NDIMS) + clip_2_ranges = ntuple(_ -> Base.OneTo(dg.basis.N - 1), NDIMS) + # These splattings do not seem to allocate as of Julia 1.9.0? + total_energy_clip1 = sum(x -> x^2, view(modal, clip_1_ranges...)) + total_energy_clip2 = sum(x -> x^2, view(modal, clip_2_ranges...)) + + # Calculate energy in higher modes + if !(iszero(total_energy)) + energy_frac_1 = (total_energy - total_energy_clip1) / total_energy + else + energy_frac_1 = zero(total_energy) + end + if !(iszero(total_energy_clip1)) + energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + else + energy_frac_2 = zero(total_energy_clip1) + end + energy = max(energy_frac_1, energy_frac_2) + + alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) + + # Take care of the case close to pure DG + if alpha_element < alpha_min + alpha_element = zero(alpha_element) + end + + # Take care of the case close to pure FV + if alpha_element > 1 - alpha_min + alpha_element = one(alpha_element) + end + + # Clip the maximum amount of FV allowed + alpha[element] = min(alpha_max, alpha_element) end - # Take care of the case close to pure FV - if alpha_element > 1 - alpha_min - alpha_element = one(alpha_element) + # smooth element indices after they're all computed + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) end - # Clip the maximum amount of FV allowed - alpha[element] = min(alpha_max, alpha_element) - end - - # smooth element indices after they're all computed - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end - - return alpha + return alpha end # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha function apply_smoothing!(mesh::DGMultiMesh, alpha, alpha_tmp, dg::DGMulti, cache) - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - # smooth alpha with its neighboring value - for element in eachelement(mesh, dg) - for face in Base.OneTo(StartUpDG.num_faces(dg.basis.element_type)) - neighboring_element = cache.element_to_element_connectivity[face, element] - alpha_neighbor = alpha_tmp[neighboring_element] - alpha[element] = max(alpha[element], 0.5 * alpha_neighbor) + # smooth alpha with its neighboring value + for element in eachelement(mesh, dg) + for face in Base.OneTo(StartUpDG.num_faces(dg.basis.element_type)) + neighboring_element = cache.element_to_element_connectivity[face, element] + alpha_neighbor = alpha_tmp[neighboring_element] + alpha[element] = max(alpha[element], 0.5 * alpha_neighbor) + end end - end - end # pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) @@ -160,20 +158,20 @@ end # `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme. function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh::DGMultiMesh, dg::DGMulti) - empty!(element_ids_dg) - empty!(element_ids_dgfv) - - for element in eachelement(mesh, dg) - # Clip blending factor for values close to zero (-> pure DG) - dg_only = isapprox(alpha[element], 0, atol=1e-12) - if dg_only - push!(element_ids_dg, element) - else - push!(element_ids_dgfv, element) + empty!(element_ids_dg) + empty!(element_ids_dgfv) + + for element in eachelement(mesh, dg) + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha[element], 0, atol = 1e-12) + if dg_only + push!(element_ids_dg, element) + else + push!(element_ids_dgfv, element) + end end - end - return nothing + return nothing end function calc_volume_integral!(du, u, @@ -181,167 +179,177 @@ function calc_volume_integral!(du, u, have_nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGMultiFluxDiff, cache) + (; element_ids_dg, element_ids_dgfv) = cache + (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral - (; element_ids_dg, element_ids_dgfv) = cache - (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral - - # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α - alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) + # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α + alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, + cache) - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg) + # Determine element ids for DG-only and blended DG-FV volume integral + pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg) - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, - equations, volume_flux_dg, dg, cache) - end - - # Loop over blended DG-FV elements, blend the high and low order RHS contributions - # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`. - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] - alpha_element = alpha[element] - - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) + # Loop over pure DG elements + @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) + element = element_ids_dg[idx_element] + flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + equations, volume_flux_dg, dg, cache) + end - # Calculate "FV" low order volume integral contribution - low_order_flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux_fv, dg, cache, alpha_element) - end + # Loop over blended DG-FV elements, blend the high and low order RHS contributions + # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`. + @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) + element = element_ids_dgfv[idx_element] + alpha_element = alpha[element] + + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) + + # Calculate "FV" low order volume integral contribution + low_order_flux_differencing_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_flux_fv, dg, cache, alpha_element) + end - return nothing + return nothing end -get_sparse_operator_entries(i, j, mesh::DGMultiMesh{1}, cache) = - SVector(cache.sparse_hybridized_SBP_operators[1][i, j]) +function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{1}, cache) + SVector(cache.sparse_hybridized_SBP_operators[1][i, j]) +end function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{2}, cache) - Qr, Qs = cache.sparse_hybridized_SBP_operators - return SVector(Qr[i, j], Qs[i, j]) + Qr, Qs = cache.sparse_hybridized_SBP_operators + return SVector(Qr[i, j], Qs[i, j]) end function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{3}, cache) - Qr, Qs, Qt = cache.sparse_hybridized_SBP_operators - return SVector(Qr[i, j], Qs[i, j], Qt[i, j]) + Qr, Qs, Qt = cache.sparse_hybridized_SBP_operators + return SVector(Qr[i, j], Qs[i, j], Qt[i, j]) end -get_contravariant_matrix(element, mesh::DGMultiMesh{1}, cache) = - SMatrix{1, 1}(cache.dxidxhatj[1, 1][1, element]) +function get_contravariant_matrix(element, mesh::DGMultiMesh{1}, cache) + SMatrix{1, 1}(cache.dxidxhatj[1, 1][1, element]) +end function get_contravariant_matrix(element, mesh::DGMultiMesh{2, <:Affine}, cache) - (; dxidxhatj) = cache - return SMatrix{2, 2}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], - dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element]) + (; dxidxhatj) = cache + return SMatrix{2, 2}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], + dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element]) end function get_contravariant_matrix(element, mesh::DGMultiMesh{3, <:Affine}, cache) - (; dxidxhatj) = cache - return SMatrix{3, 3}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], dxidxhatj[3, 1][1, element], - dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element], dxidxhatj[3, 2][1, element], - dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element], dxidxhatj[3, 3][1, element]) + (; dxidxhatj) = cache + return SMatrix{3, 3}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], + dxidxhatj[3, 1][1, element], + dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element], + dxidxhatj[3, 2][1, element], + dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element], + dxidxhatj[3, 3][1, element]) end function get_contravariant_matrix(i, element, mesh::DGMultiMesh{2}, cache) - (; dxidxhatj) = cache - return SMatrix{2, 2}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], - dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element]) + (; dxidxhatj) = cache + return SMatrix{2, 2}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], + dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element]) end function get_contravariant_matrix(i, element, mesh::DGMultiMesh{3}, cache) - (; dxidxhatj) = cache - return SMatrix{3, 3}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], dxidxhatj[3, 1][i, element], - dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element], dxidxhatj[3, 2][i, element], - dxidxhatj[1, 3][i, element], dxidxhatj[2, 3][i, element], dxidxhatj[3, 3][i, element]) + (; dxidxhatj) = cache + return SMatrix{3, 3}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], + dxidxhatj[3, 1][i, element], + dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element], + dxidxhatj[3, 2][i, element], + dxidxhatj[1, 3][i, element], dxidxhatj[2, 3][i, element], + dxidxhatj[3, 3][i, element]) end -get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache) = - 0.5 * (get_contravariant_matrix(i, element, mesh, cache) + get_contravariant_matrix(j, element, mesh, cache)) +function get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache) + 0.5 * (get_contravariant_matrix(i, element, mesh, cache) + + get_contravariant_matrix(j, element, mesh, cache)) +end # computes an algebraic low order method with internal dissipation. # This method is for affine/Cartesian meshes function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, - volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha=true) - - # accumulates output from flux differencing - rhs_local = cache.rhs_local_threaded[Threads.threadid()] - fill!(rhs_local, zero(eltype(rhs_local))) - - u_local = view(cache.entropy_projected_u_values, :, element) - - # constant over each element - geometric_matrix = get_contravariant_matrix(element, mesh, cache) - - (; sparsity_pattern) = cache - A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) - for i in row_ids - u_i = u_local[i] - du_i = zero(u_i) - for id in nzrange(A_base, i) - j = rows[id] - u_j = u_local[j] - - # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j - reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) - normal_direction_ij = geometric_matrix * reference_operator_entries - - # note that we do not need to normalize `normal_direction_ij` since - # it is typically normalized within the flux computation. - f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations) - du_i = du_i + 2 * f_ij + volume_flux_fv, + dg::DGMultiFluxDiff{<:GaussSBP}, + cache, alpha = true) + + # accumulates output from flux differencing + rhs_local = cache.rhs_local_threaded[Threads.threadid()] + fill!(rhs_local, zero(eltype(rhs_local))) + + u_local = view(cache.entropy_projected_u_values, :, element) + + # constant over each element + geometric_matrix = get_contravariant_matrix(element, mesh, cache) + + (; sparsity_pattern) = cache + A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) + for i in row_ids + u_i = u_local[i] + du_i = zero(u_i) + for id in nzrange(A_base, i) + j = rows[id] + u_j = u_local[j] + + # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j + reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) + normal_direction_ij = geometric_matrix * reference_operator_entries + + # note that we do not need to normalize `normal_direction_ij` since + # it is typically normalized within the flux computation. + f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations) + du_i = du_i + 2 * f_ij + end + rhs_local[i] = du_i end - rhs_local[i] = du_i - end - - # TODO: factor this out to avoid calling it twice during calc_volume_integral! - project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) + # TODO: factor this out to avoid calling it twice during calc_volume_integral! + project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) end -function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh{NDIMS, <:NonAffine}, +function low_order_flux_differencing_kernel!(du, u, element, + mesh::DGMultiMesh{NDIMS, <:NonAffine}, have_nonconservative_terms::False, equations, - volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha=true) where {NDIMS} - - # accumulates output from flux differencing - rhs_local = cache.rhs_local_threaded[Threads.threadid()] - fill!(rhs_local, zero(eltype(rhs_local))) - - u_local = view(cache.entropy_projected_u_values, :, element) - - (; sparsity_pattern) = cache - A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) - for i in row_ids - u_i = u_local[i] - du_i = zero(u_i) - for id in nzrange(A_base, i) - j = rows[id] - u_j = u_local[j] - - # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j - geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache) - reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) - normal_direction_ij = geometric_matrix * reference_operator_entries - - # note that we do not need to normalize `normal_direction_ij` since - # it is typically normalized within the flux computation. - f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations) - du_i = du_i + 2 * f_ij + volume_flux_fv, + dg::DGMultiFluxDiff{<:GaussSBP}, + cache, alpha = true) where {NDIMS} + + # accumulates output from flux differencing + rhs_local = cache.rhs_local_threaded[Threads.threadid()] + fill!(rhs_local, zero(eltype(rhs_local))) + + u_local = view(cache.entropy_projected_u_values, :, element) + + (; sparsity_pattern) = cache + A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR + row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) + for i in row_ids + u_i = u_local[i] + du_i = zero(u_i) + for id in nzrange(A_base, i) + j = rows[id] + u_j = u_local[j] + + # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j + geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache) + reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) + normal_direction_ij = geometric_matrix * reference_operator_entries + + # note that we do not need to normalize `normal_direction_ij` since + # it is typically normalized within the flux computation. + f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations) + du_i = du_i + 2 * f_ij + end + rhs_local[i] = du_i end - rhs_local[i] = du_i - end - - # TODO: factor this out to avoid calling it twice during calc_volume_integral! - project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) + # TODO: factor this out to avoid calling it twice during calc_volume_integral! + project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) end - diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl index c452ed67b2e..c225e334e8e 100644 --- a/src/solvers/dgmulti/types.jl +++ b/src/solvers/dgmulti/types.jl @@ -4,32 +4,62 @@ # `DGMulti` refers to both multiple DG types (polynomial/SBP, simplices/quads/hexes) as well as # the use of multi-dimensional operators in the solver. -const DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} = - DG{<:RefElemData{NDIMS, ElemType, ApproxType}, Mortar, SurfaceIntegral, VolumeIntegral} where {Mortar} +const DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} = DG{ + <:RefElemData{ + NDIMS, + ElemType, + ApproxType + }, + Mortar, + SurfaceIntegral, + VolumeIntegral + } where { + Mortar + } # Type aliases. The first parameter is `ApproxType` since it is more commonly used for dispatch. -const DGMultiWeakForm{ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:VolumeIntegralWeakForm} where {NDIMS} - -const DGMultiFluxDiff{ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS} - -const DGMultiFluxDiffSBP{ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS, ApproxType<:Union{SBP, AbstractDerivativeOperator}} - -const DGMultiSBP{ApproxType, ElemType} = - DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:Union{SBP, AbstractDerivativeOperator}, SurfaceIntegral, VolumeIntegral} - +const DGMultiWeakForm{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType, + <:SurfaceIntegralWeakForm, + <:VolumeIntegralWeakForm + } where {NDIMS} + +const DGMultiFluxDiff{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType, + <:SurfaceIntegralWeakForm, + <:Union{ + VolumeIntegralFluxDifferencing, + VolumeIntegralShockCapturingHG + }} where {NDIMS} + +const DGMultiFluxDiffSBP{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType, + <:SurfaceIntegralWeakForm, + <:Union{ + VolumeIntegralFluxDifferencing, + VolumeIntegralShockCapturingHG + } + } where {NDIMS, + ApproxType <: Union{SBP, + AbstractDerivativeOperator + }} + +const DGMultiSBP{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType, + SurfaceIntegral, VolumeIntegral + } where {NDIMS, ElemType, + ApproxType <: Union{SBP, + AbstractDerivativeOperator}, + SurfaceIntegral, VolumeIntegral} # By default, Julia/LLVM does not use fused multiply-add operations (FMAs). # Since these FMAs can increase the performance of many numerical algorithms, # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # these are necessary for pretty printing polydeg(dg::DGMulti) = dg.basis.N -Base.summary(io::IO, dg::DG) where {DG <: DGMulti} = print(io, "DGMulti(polydeg=$(polydeg(dg)))") +function Base.summary(io::IO, dg::DG) where {DG <: DGMulti} + print(io, "DGMulti(polydeg=$(polydeg(dg)))") +end # real(rd) is the eltype of the nodes `rd.r`. Base.real(rd::RefElemData) = eltype(rd.r) @@ -53,17 +83,17 @@ Optional: - `RefElemData_kwargs` are additional keyword arguments for `RefElemData`, such as `quad_rule_vol`. For more info, see the [StartUpDG.jl docs](https://jlchan.github.io/StartUpDG.jl/dev/). """ -function DGMulti(; polydeg=nothing, - element_type::AbstractElemShape, - approximation_type=Polynomial(), - surface_flux=flux_central, - surface_integral=SurfaceIntegralWeakForm(surface_flux), - volume_integral=VolumeIntegralWeakForm(), - kwargs...) - - # call dispatchable constructor - DGMulti(element_type, approximation_type, volume_integral, surface_integral; - polydeg=polydeg, kwargs...) +function DGMulti(; polydeg = nothing, + element_type::AbstractElemShape, + approximation_type = Polynomial(), + surface_flux = flux_central, + surface_integral = SurfaceIntegralWeakForm(surface_flux), + volume_integral = VolumeIntegralWeakForm(), + kwargs...) + + # call dispatchable constructor + DGMulti(element_type, approximation_type, volume_integral, surface_integral; + polydeg = polydeg, kwargs...) end # dispatchable constructor for DGMulti to allow for specialization @@ -73,13 +103,15 @@ function DGMulti(element_type::AbstractElemShape, surface_integral; polydeg::Integer, kwargs...) - - rd = RefElemData(element_type, approximation_type, polydeg; kwargs...) - return DG(rd, nothing #= mortar =#, surface_integral, volume_integral) + rd = RefElemData(element_type, approximation_type, polydeg; kwargs...) + # `nothing` is passed as `mortar` + return DG(rd, nothing, surface_integral, volume_integral) end -DGMulti(basis::RefElemData; volume_integral, surface_integral) = - DG(basis, nothing #= mortar =#, surface_integral, volume_integral) +function DGMulti(basis::RefElemData; volume_integral, surface_integral) + # `nothing` is passed as `mortar` + DG(basis, nothing, surface_integral, volume_integral) +end """ DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...) @@ -90,9 +122,10 @@ Constructs a basis for DGMulti solvers. Returns a "StartUpDG.RefElemData" object For more info, see the [StartUpDG.jl docs](https://jlchan.github.io/StartUpDG.jl/dev/). """ -DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...) = - RefElemData(element_type, approximation_type, polydeg; kwargs...) - +function DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), + kwargs...) + RefElemData(element_type, approximation_type, polydeg; kwargs...) +end ######################################## # DGMultiMesh @@ -100,8 +133,10 @@ DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs... # now that `DGMulti` is defined, we can define constructors for `DGMultiMesh` which use `dg::DGMulti` -function DGMultiMesh(dg::DGMulti, geometric_term_type, md::MeshData{NDIMS}, boundary_faces) where {NDIMS} - return DGMultiMesh{NDIMS, typeof(geometric_term_type), typeof(md), typeof(boundary_faces)}(md, boundary_faces) +function DGMultiMesh(dg::DGMulti, geometric_term_type, md::MeshData{NDIMS}, + boundary_faces) where {NDIMS} + return DGMultiMesh{NDIMS, typeof(geometric_term_type), typeof(md), + typeof(boundary_faces)}(md, boundary_faces) end # Mesh types used internally for trait dispatch @@ -115,11 +150,15 @@ struct Affine <: GeometricTermsType end # mesh produces constant geometric terms struct NonAffine <: GeometricTermsType end # mesh produces non-constant geometric terms # choose MeshType based on the constructor and element type -GeometricTermsType(mesh_type, dg::DGMulti) = GeometricTermsType(mesh_type, dg.basis.element_type) +function GeometricTermsType(mesh_type, dg::DGMulti) + GeometricTermsType(mesh_type, dg.basis.element_type) +end GeometricTermsType(mesh_type::Cartesian, element_type::AbstractElemShape) = Affine() GeometricTermsType(mesh_type::TriangulateIO, element_type::Tri) = Affine() GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Tri, Tet}) = Affine() -GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Quad, Hex}) = NonAffine() +function GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Quad, Hex}) + NonAffine() +end GeometricTermsType(mesh_type::Curved, element_type::AbstractElemShape) = NonAffine() # other potential constructor types to add later: Bilinear, Isoparametric{polydeg_geo}, Rational/Exact? @@ -139,17 +178,16 @@ GeometricTermsType(mesh_type::Curved, element_type::AbstractElemShape) = NonAffi (x,y,z) direction. """ function DGMultiMesh(dg::DGMulti{NDIMS}, vertex_coordinates, EToV::AbstractArray; - is_on_boundary=nothing, - periodicity=ntuple(_->false, NDIMS), kwargs...) where {NDIMS} - - md = MeshData(vertex_coordinates, EToV, dg.basis) - if NDIMS == 1 - md = StartUpDG.make_periodic(md, periodicity...) - else - md = StartUpDG.make_periodic(md, periodicity) - end - boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary) - return DGMultiMesh(dg, GeometricTermsType(VertexMapped(), dg), md, boundary_faces) + is_on_boundary = nothing, + periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS} + md = MeshData(vertex_coordinates, EToV, dg.basis) + if NDIMS == 1 + md = StartUpDG.make_periodic(md, periodicity...) + else + md = StartUpDG.make_periodic(md, periodicity) + end + boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary) + return DGMultiMesh(dg, GeometricTermsType(VertexMapped(), dg), md, boundary_faces) end """ @@ -161,13 +199,15 @@ end - `boundary_dict` is a `Dict{Symbol, Int}` which associates each integer `TriangulateIO` boundary tag with a `Symbol`. """ -function DGMultiMesh(dg::DGMulti{2, Tri}, triangulateIO, boundary_dict::Dict{Symbol, Int}; - periodicity=(false, false)) - vertex_coordinates, EToV = StartUpDG.triangulateIO_to_VXYEToV(triangulateIO) - md = MeshData(vertex_coordinates, EToV, dg.basis) - md = StartUpDG.make_periodic(md, periodicity) - boundary_faces = StartUpDG.tag_boundary_faces(triangulateIO, dg.basis, md, boundary_dict) - return DGMultiMesh(dg, GeometricTermsType(TriangulateIO(), dg), md, boundary_faces) +function DGMultiMesh(dg::DGMulti{2, Tri}, triangulateIO, + boundary_dict::Dict{Symbol, Int}; + periodicity = (false, false)) + vertex_coordinates, EToV = StartUpDG.triangulateIO_to_VXYEToV(triangulateIO) + md = MeshData(vertex_coordinates, EToV, dg.basis) + md = StartUpDG.make_periodic(md, periodicity) + boundary_faces = StartUpDG.tag_boundary_faces(triangulateIO, dg.basis, md, + boundary_dict) + return DGMultiMesh(dg, GeometricTermsType(TriangulateIO(), dg), md, boundary_faces) end """ @@ -182,26 +222,27 @@ the tensor product of the intervals `[coordinates_min[i], coordinates_max[i]]`. - `periodicity` is a tuple of `Bool`s specifying periodicity = `true`/`false` in the (x,y,z) direction. """ function DGMultiMesh(dg::DGMulti{NDIMS}, cells_per_dimension; - coordinates_min=ntuple(_ -> -one(real(dg)), NDIMS), - coordinates_max=ntuple(_ -> one(real(dg)), NDIMS), - is_on_boundary=nothing, - periodicity=ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS} - - vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, cells_per_dimension...) - domain_lengths = coordinates_max .- coordinates_min - for i in 1:NDIMS - @. vertex_coordinates[i] = 0.5 * (vertex_coordinates[i] + 1) * domain_lengths[i] + coordinates_min[i] - end - - md = MeshData(vertex_coordinates, EToV, dg.basis) - if NDIMS == 1 && first(periodicity) == true - md = StartUpDG.make_periodic(md) - end - if NDIMS > 1 - md = StartUpDG.make_periodic(md, periodicity) - end - boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary) - return DGMultiMesh(dg, GeometricTermsType(Cartesian(), dg), md, boundary_faces) + coordinates_min = ntuple(_ -> -one(real(dg)), NDIMS), + coordinates_max = ntuple(_ -> one(real(dg)), NDIMS), + is_on_boundary = nothing, + periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS} + vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, + cells_per_dimension...) + domain_lengths = coordinates_max .- coordinates_min + for i in 1:NDIMS + @. vertex_coordinates[i] = 0.5 * (vertex_coordinates[i] + 1) * + domain_lengths[i] + coordinates_min[i] + end + + md = MeshData(vertex_coordinates, EToV, dg.basis) + if NDIMS == 1 && first(periodicity) == true + md = StartUpDG.make_periodic(md) + end + if NDIMS > 1 + md = StartUpDG.make_periodic(md, periodicity) + end + boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary) + return DGMultiMesh(dg, GeometricTermsType(Cartesian(), dg), md, boundary_faces) end """ @@ -216,22 +257,23 @@ Constructs a `Curved()` [`DGMultiMesh`](@ref) with element type `dg.basis.elemen - `periodicity` is a tuple of `Bool`s specifying periodicity = `true`/`false` in the (x,y,z) direction. """ function DGMultiMesh(dg::DGMulti{NDIMS}, cells_per_dimension, mapping; - is_on_boundary=nothing, - periodicity=ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS} - - vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, cells_per_dimension...) - md = MeshData(vertex_coordinates, EToV, dg.basis) - md = NDIMS==1 ? StartUpDG.make_periodic(md, periodicity...) : StartUpDG.make_periodic(md, periodicity) - - @unpack xyz = md - for i in eachindex(xyz[1]) - new_xyz = mapping(getindex.(xyz, i)...) - setindex!.(xyz, new_xyz, i) - end - md_curved = MeshData(dg.basis, md, xyz...) - - boundary_faces = StartUpDG.tag_boundary_faces(md_curved, is_on_boundary) - return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md_curved, boundary_faces) + is_on_boundary = nothing, + periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS} + vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, + cells_per_dimension...) + md = MeshData(vertex_coordinates, EToV, dg.basis) + md = NDIMS == 1 ? StartUpDG.make_periodic(md, periodicity...) : + StartUpDG.make_periodic(md, periodicity) + + @unpack xyz = md + for i in eachindex(xyz[1]) + new_xyz = mapping(getindex.(xyz, i)...) + setindex!.(xyz, new_xyz, i) + end + md_curved = MeshData(dg.basis, md, xyz...) + + boundary_faces = StartUpDG.tag_boundary_faces(md_curved, is_on_boundary) + return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md_curved, boundary_faces) end """ @@ -243,144 +285,148 @@ end [HOHQMesh](https://github.com/trixi-framework/HOHQMesh). """ function DGMultiMesh(dg::DGMulti{NDIMS}, filename::String; - periodicity=ntuple(_ -> false, NDIMS)) where {NDIMS} - - hohqmesh_data = StartUpDG.read_HOHQMesh(filename) - md = MeshData(hohqmesh_data, dg.basis) - md = StartUpDG.make_periodic(md, periodicity) - boundary_faces = Dict(Pair.(keys(md.mesh_type.boundary_faces), values(md.mesh_type.boundary_faces))) - return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md, boundary_faces) + periodicity = ntuple(_ -> false, NDIMS)) where {NDIMS} + hohqmesh_data = StartUpDG.read_HOHQMesh(filename) + md = MeshData(hohqmesh_data, dg.basis) + md = StartUpDG.make_periodic(md, periodicity) + boundary_faces = Dict(Pair.(keys(md.mesh_type.boundary_faces), + values(md.mesh_type.boundary_faces))) + return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md, boundary_faces) end # Matrix type for lazy construction of physical differentiation matrices # Constructs a lazy linear combination of B = ∑_i coeffs[i] * A[i] -struct LazyMatrixLinearCombo{Tcoeffs, N, Tv, TA <: AbstractMatrix{Tv}} <: AbstractMatrix{Tv} - matrices::NTuple{N, TA} - coeffs::NTuple{N, Tcoeffs} - function LazyMatrixLinearCombo(matrices, coeffs) - @assert all(matrix -> size(matrix) == size(first(matrices)), matrices) - new{typeof(first(coeffs)), length(matrices), eltype(first(matrices)), typeof(first(matrices))}(matrices, coeffs) - end +struct LazyMatrixLinearCombo{Tcoeffs, N, Tv, TA <: AbstractMatrix{Tv}} <: + AbstractMatrix{Tv} + matrices::NTuple{N, TA} + coeffs::NTuple{N, Tcoeffs} + function LazyMatrixLinearCombo(matrices, coeffs) + @assert all(matrix -> size(matrix) == size(first(matrices)), matrices) + new{typeof(first(coeffs)), length(matrices), eltype(first(matrices)), + typeof(first(matrices))}(matrices, coeffs) + end end Base.eltype(A::LazyMatrixLinearCombo) = eltype(first(A.matrices)) Base.IndexStyle(A::LazyMatrixLinearCombo) = IndexCartesian() Base.size(A::LazyMatrixLinearCombo) = size(first(A.matrices)) @inline function Base.getindex(A::LazyMatrixLinearCombo{<:Real, N}, i, j) where {N} - val = zero(eltype(A)) - for k in Base.OneTo(N) - val = val + A.coeffs[k] * getindex(A.matrices[k], i, j) - end - return val + val = zero(eltype(A)) + for k in Base.OneTo(N) + val = val + A.coeffs[k] * getindex(A.matrices[k], i, j) + end + return val end # `SimpleKronecker` lazily stores a Kronecker product `kron(ntuple(A, NDIMS)...)`. # This object also allocates some temporary storage to enable the fast computation # of matrix-vector products. struct SimpleKronecker{NDIMS, TA, Ttmp} - A::TA - tmp_storage::Ttmp # temporary array used for Kronecker multiplication + A::TA + tmp_storage::Ttmp # temporary array used for Kronecker multiplication end # constructor for SimpleKronecker which requires specifying only `NDIMS` and # the 1D matrix `A`. -function SimpleKronecker(NDIMS, A, eltype_A=eltype(A)) - @assert size(A, 1) == size(A, 2) # check if square - tmp_storage=[zeros(eltype_A, ntuple(_ -> size(A, 2), NDIMS)...) for _ in 1:Threads.nthreads()] - return SimpleKronecker{NDIMS, typeof(A), typeof(tmp_storage)}(A, tmp_storage) +function SimpleKronecker(NDIMS, A, eltype_A = eltype(A)) + @assert size(A, 1) == size(A, 2) # check if square + tmp_storage = [zeros(eltype_A, ntuple(_ -> size(A, 2), NDIMS)...) + for _ in 1:Threads.nthreads()] + return SimpleKronecker{NDIMS, typeof(A), typeof(tmp_storage)}(A, tmp_storage) end # Computes `b = kron(A, A) * x` in an optimized fashion function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{2}, x_in) - - @unpack A = A_kronecker - tmp_storage = A_kronecker.tmp_storage[Threads.threadid()] - n = size(A, 2) - - # copy `x_in` to `tmp_storage` to avoid mutating the input - @assert length(tmp_storage) == length(x_in) - @turbo thread=true for i in eachindex(tmp_storage) - tmp_storage[i] = x_in[i] - end - x = reshape(tmp_storage, n, n) - # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. - # Thus, Base.ReshapedArray should be used if you are setting values in the array. - # `reshape` is fine if you are only accessing values. - b = Base.ReshapedArray(b_in, (n, n), ()) - - @turbo thread=true for j in 1:n, i in 1:n - tmp = zero(eltype(x)) - for ii in 1:n - tmp = tmp + A[i, ii] * x[ii, j] + @unpack A = A_kronecker + tmp_storage = A_kronecker.tmp_storage[Threads.threadid()] + n = size(A, 2) + + # copy `x_in` to `tmp_storage` to avoid mutating the input + @assert length(tmp_storage) == length(x_in) + @turbo thread=true for i in eachindex(tmp_storage) + tmp_storage[i] = x_in[i] + end + x = reshape(tmp_storage, n, n) + # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. + # Thus, Base.ReshapedArray should be used if you are setting values in the array. + # `reshape` is fine if you are only accessing values. + b = Base.ReshapedArray(b_in, (n, n), ()) + + @turbo thread=true for j in 1:n, i in 1:n + tmp = zero(eltype(x)) + for ii in 1:n + tmp = tmp + A[i, ii] * x[ii, j] + end + b[i, j] = tmp end - b[i, j] = tmp - end - @turbo thread=true for j in 1:n, i in 1:n - tmp = zero(eltype(x)) - for jj in 1:n - tmp = tmp + A[j, jj] * b[i, jj] + @turbo thread=true for j in 1:n, i in 1:n + tmp = zero(eltype(x)) + for jj in 1:n + tmp = tmp + A[j, jj] * b[i, jj] + end + x[i, j] = tmp end - x[i, j] = tmp - end - @turbo thread=true for i in eachindex(b_in) - b_in[i] = x[i] - end + @turbo thread=true for i in eachindex(b_in) + b_in[i] = x[i] + end - return nothing + return nothing end # Computes `b = kron(A, A, A) * x` in an optimized fashion function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{3}, x_in) + @unpack A = A_kronecker + tmp_storage = A_kronecker.tmp_storage[Threads.threadid()] + n = size(A, 2) - @unpack A = A_kronecker - tmp_storage = A_kronecker.tmp_storage[Threads.threadid()] - n = size(A, 2) - - # copy `x_in` to `tmp_storage` to avoid mutating the input - @turbo thread=true for i in eachindex(tmp_storage) - tmp_storage[i] = x_in[i] - end - x = reshape(tmp_storage, n, n, n) - # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. - # Thus, Base.ReshapedArray should be used if you are setting values in the array. - # `reshape` is fine if you are only accessing values. - b = Base.ReshapedArray(b_in, (n, n, n), ()) - - @turbo thread=true for k in 1:n, j in 1:n, i in 1:n - tmp = zero(eltype(x)) - for ii in 1:n - tmp = tmp + A[i, ii] * x[ii, j, k] + # copy `x_in` to `tmp_storage` to avoid mutating the input + @turbo thread=true for i in eachindex(tmp_storage) + tmp_storage[i] = x_in[i] + end + x = reshape(tmp_storage, n, n, n) + # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values. + # Thus, Base.ReshapedArray should be used if you are setting values in the array. + # `reshape` is fine if you are only accessing values. + b = Base.ReshapedArray(b_in, (n, n, n), ()) + + @turbo thread=true for k in 1:n, j in 1:n, i in 1:n + tmp = zero(eltype(x)) + for ii in 1:n + tmp = tmp + A[i, ii] * x[ii, j, k] + end + b[i, j, k] = tmp end - b[i, j, k] = tmp - end - @turbo thread=true for k in 1:n, j in 1:n, i in 1:n - tmp = zero(eltype(x)) - for jj in 1:n - tmp = tmp + A[j, jj] * b[i, jj, k] + @turbo thread=true for k in 1:n, j in 1:n, i in 1:n + tmp = zero(eltype(x)) + for jj in 1:n + tmp = tmp + A[j, jj] * b[i, jj, k] + end + x[i, j, k] = tmp end - x[i, j, k] = tmp - end - @turbo thread=true for k in 1:n, j in 1:n, i in 1:n - tmp = zero(eltype(x)) - for kk in 1:n - tmp = tmp + A[k, kk] * x[i, j, kk] + @turbo thread=true for k in 1:n, j in 1:n, i in 1:n + tmp = zero(eltype(x)) + for kk in 1:n + tmp = tmp + A[k, kk] * x[i, j, kk] + end + b[i, j, k] = tmp end - b[i, j, k] = tmp - end - return nothing + return nothing end - end # @muladd # TODO: deprecations introduced in Trixi.jl v0.6 -@deprecate DGMultiMesh(dg::DGMulti{NDIMS}; cells_per_dimension, kwargs...) where {NDIMS} DGMultiMesh(dg, cells_per_dimension; kwargs...) +@deprecate DGMultiMesh(dg::DGMulti{NDIMS}; cells_per_dimension, kwargs...) where {NDIMS} DGMultiMesh(dg, + cells_per_dimension; + kwargs...) # TODO: deprecations introduced in Trixi.jl v0.5 -@deprecate DGMultiMesh(vertex_coordinates, EToV, dg::DGMulti{NDIMS}; kwargs...) where {NDIMS} DGMultiMesh(dg, vertex_coordinates, EToV; kwargs...) -@deprecate DGMultiMesh(triangulateIO, dg::DGMulti{2, Tri}, boundary_dict::Dict{Symbol, Int}; kwargs...) DGMultiMesh(dg, triangulateIO, boundary_dict; kwargs...) - +@deprecate DGMultiMesh(vertex_coordinates, EToV, dg::DGMulti{NDIMS}; + kwargs...) where {NDIMS} DGMultiMesh(dg, vertex_coordinates, EToV; + kwargs...) +@deprecate DGMultiMesh(triangulateIO, dg::DGMulti{2, Tri}, boundary_dict::Dict{Symbol, Int}; + kwargs...) DGMultiMesh(dg, triangulateIO, boundary_dict; kwargs...) diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index d34b0275da1..1b4e5446e44 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -3,102 +3,116 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ LobattoLegendreBasis([RealT=Float64,] polydeg::Integer) Create a nodal Lobatto-Legendre basis for polynomials of degree `polydeg`. -""" -struct LobattoLegendreBasis{RealT<:Real, NNODES, - VectorT<:AbstractVector{RealT}, - InverseVandermondeLegendre<:AbstractMatrix{RealT}, - BoundaryMatrix<:AbstractMatrix{RealT}, - DerivativeMatrix<:AbstractMatrix{RealT}} <: AbstractBasisSBP{RealT} - nodes ::VectorT - weights ::VectorT - inverse_weights::VectorT - - inverse_vandermonde_legendre::InverseVandermondeLegendre - boundary_interpolation ::BoundaryMatrix # lhat - derivative_matrix ::DerivativeMatrix # strong form derivative matrix - derivative_split ::DerivativeMatrix # strong form derivative matrix minus boundary terms - derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split` - derivative_dhat ::DerivativeMatrix # weak form matrix "dhat", - # negative adjoint wrt the SBP dot product +For the special case `polydeg=0` the DG method reduces to a finite volume method. +Therefore, this function sets the center point of the cell as single node. +""" +struct LobattoLegendreBasis{RealT <: Real, NNODES, + VectorT <: AbstractVector{RealT}, + InverseVandermondeLegendre <: AbstractMatrix{RealT}, + BoundaryMatrix <: AbstractMatrix{RealT}, + DerivativeMatrix <: AbstractMatrix{RealT}} <: + AbstractBasisSBP{RealT} + nodes::VectorT + weights::VectorT + inverse_weights::VectorT + + inverse_vandermonde_legendre::InverseVandermondeLegendre + boundary_interpolation::BoundaryMatrix # lhat + + derivative_matrix::DerivativeMatrix # strong form derivative matrix + derivative_split::DerivativeMatrix # strong form derivative matrix minus boundary terms + derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split` + derivative_dhat::DerivativeMatrix # weak form matrix "dhat", + # negative adjoint wrt the SBP dot product end function LobattoLegendreBasis(RealT, polydeg::Integer) - nnodes_ = polydeg + 1 - - # compute everything using `Float64` by default - nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_) - inverse_weights_ = inv.(weights_) - - _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_) - - boundary_interpolation_ = zeros(nnodes_, 2) - boundary_interpolation_[:, 1] = calc_lhat(-1.0, nodes_, weights_) - boundary_interpolation_[:, 2] = calc_lhat( 1.0, nodes_, weights_) - - derivative_matrix_ = polynomial_derivative_matrix(nodes_) - derivative_split_ = calc_dsplit(nodes_, weights_) - derivative_split_transpose_ = Matrix(derivative_split_') - derivative_dhat_ = calc_dhat(nodes_, weights_) - - # type conversions to get the requested real type and enable possible - # optimizations of runtime performance and latency - nodes = SVector{nnodes_, RealT}(nodes_) - weights = SVector{nnodes_, RealT}(weights_) - inverse_weights = SVector{nnodes_, RealT}(inverse_weights_) - - inverse_vandermonde_legendre = convert.(RealT, inverse_vandermonde_legendre_) - boundary_interpolation = convert.(RealT, boundary_interpolation_) - - # Usually as fast as `SMatrix` (when using `let` in the volume integral/`@threaded`) - derivative_matrix = Matrix{RealT}(derivative_matrix_) - derivative_split = Matrix{RealT}(derivative_split_) - derivative_split_transpose = Matrix{RealT}(derivative_split_transpose_) - derivative_dhat = Matrix{RealT}(derivative_dhat_) - - return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes), typeof(inverse_vandermonde_legendre), typeof(boundary_interpolation), typeof(derivative_matrix)}( - nodes, weights, inverse_weights, - inverse_vandermonde_legendre, boundary_interpolation, - derivative_matrix, derivative_split, derivative_split_transpose, derivative_dhat - ) + nnodes_ = polydeg + 1 + + # compute everything using `Float64` by default + nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_) + inverse_weights_ = inv.(weights_) + + _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_) + + boundary_interpolation_ = zeros(nnodes_, 2) + boundary_interpolation_[:, 1] = calc_lhat(-1.0, nodes_, weights_) + boundary_interpolation_[:, 2] = calc_lhat(1.0, nodes_, weights_) + + derivative_matrix_ = polynomial_derivative_matrix(nodes_) + derivative_split_ = calc_dsplit(nodes_, weights_) + derivative_split_transpose_ = Matrix(derivative_split_') + derivative_dhat_ = calc_dhat(nodes_, weights_) + + # type conversions to get the requested real type and enable possible + # optimizations of runtime performance and latency + nodes = SVector{nnodes_, RealT}(nodes_) + weights = SVector{nnodes_, RealT}(weights_) + inverse_weights = SVector{nnodes_, RealT}(inverse_weights_) + + inverse_vandermonde_legendre = convert.(RealT, inverse_vandermonde_legendre_) + boundary_interpolation = convert.(RealT, boundary_interpolation_) + + # Usually as fast as `SMatrix` (when using `let` in the volume integral/`@threaded`) + derivative_matrix = Matrix{RealT}(derivative_matrix_) + derivative_split = Matrix{RealT}(derivative_split_) + derivative_split_transpose = Matrix{RealT}(derivative_split_transpose_) + derivative_dhat = Matrix{RealT}(derivative_dhat_) + + return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) end LobattoLegendreBasis(polydeg::Integer) = LobattoLegendreBasis(Float64, polydeg) function Base.show(io::IO, basis::LobattoLegendreBasis) - @nospecialize basis # reduce precompilation time + @nospecialize basis # reduce precompilation time - print(io, "LobattoLegendreBasis{", real(basis), "}(polydeg=", polydeg(basis), ")") + print(io, "LobattoLegendreBasis{", real(basis), "}(polydeg=", polydeg(basis), ")") end function Base.show(io::IO, ::MIME"text/plain", basis::LobattoLegendreBasis) - @nospecialize basis # reduce precompilation time + @nospecialize basis # reduce precompilation time - print(io, "LobattoLegendreBasis{", real(basis), "} with polynomials of degree ", polydeg(basis)) + print(io, "LobattoLegendreBasis{", real(basis), "} with polynomials of degree ", + polydeg(basis)) end function Base.:(==)(b1::LobattoLegendreBasis, b2::LobattoLegendreBasis) - if typeof(b1) != typeof(b2) - return false - end + if typeof(b1) != typeof(b2) + return false + end - for field in fieldnames(typeof(b1)) - if getfield(b1, field) != getfield(b2, field) - return false + for field in fieldnames(typeof(b1)) + if getfield(b1, field) != getfield(b2, field) + return false + end end - end - return true + return true end @inline Base.real(basis::LobattoLegendreBasis{RealT}) where {RealT} = RealT -@inline nnodes(basis::LobattoLegendreBasis{RealT, NNODES}) where {RealT, NNODES} = NNODES +@inline function nnodes(basis::LobattoLegendreBasis{RealT, NNODES}) where {RealT, NNODES + } + NNODES +end """ eachnode(basis::LobattoLegendreBasis) @@ -113,7 +127,6 @@ In particular, not the nodes themselves are returned. @inline get_nodes(basis::LobattoLegendreBasis) = basis.nodes - """ integrate(f, u, basis::LobattoLegendreBasis) @@ -121,13 +134,13 @@ Map the function `f` to the coefficients `u` and integrate with respect to the quadrature rule given by `basis`. """ function integrate(f, u, basis::LobattoLegendreBasis) - @unpack weights = basis + @unpack weights = basis - res = zero(f(first(u))) - for i in eachindex(u, weights) - res += f(u[i]) * weights[i] - end - return res + res = zero(f(first(u))) + for i in eachindex(u, weights) + res += f(u[i]) * weights[i] + end + return res end # Return the first/last weight of the quadrature associated with `basis`. @@ -137,66 +150,71 @@ end left_boundary_weight(basis::LobattoLegendreBasis) = first(basis.weights) right_boundary_weight(basis::LobattoLegendreBasis) = last(basis.weights) - - -struct LobattoLegendreMortarL2{RealT<:Real, NNODES, ForwardMatrix<:AbstractMatrix{RealT}, ReverseMatrix<:AbstractMatrix{RealT}} <: AbstractMortarL2{RealT} - forward_upper::ForwardMatrix - forward_lower::ForwardMatrix - reverse_upper::ReverseMatrix - reverse_lower::ReverseMatrix +struct LobattoLegendreMortarL2{RealT <: Real, NNODES, + ForwardMatrix <: AbstractMatrix{RealT}, + ReverseMatrix <: AbstractMatrix{RealT}} <: + AbstractMortarL2{RealT} + forward_upper::ForwardMatrix + forward_lower::ForwardMatrix + reverse_upper::ReverseMatrix + reverse_lower::ReverseMatrix end function MortarL2(basis::LobattoLegendreBasis) - RealT = real(basis) - nnodes_ = nnodes(basis) - - # compute everything using `Float64` by default - forward_upper_ = calc_forward_upper(nnodes_) - forward_lower_ = calc_forward_lower(nnodes_) - reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss)) - reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss)) - - # type conversions to get the requested real type and enable possible - # optimizations of runtime performance and latency - - # Usually as fast as `SMatrix` but better for latency - forward_upper = Matrix{RealT}(forward_upper_) - forward_lower = Matrix{RealT}(forward_lower_) - - # TODO: Taal performance - # Check the performance of different implementations of `mortar_fluxes_to_elements!` - # with different types of the reverse matrices and different types of - # `fstar_upper_threaded` etc. used in the cache. - # Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` can be faster than - # `@tullio` when the matrix sizes are not necessarily static. - # reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_) - # reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_) - reverse_upper = Matrix{RealT}(reverse_upper_) - reverse_lower = Matrix{RealT}(reverse_lower_) - - LobattoLegendreMortarL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper)}( - forward_upper, forward_lower, - reverse_upper, reverse_lower) + RealT = real(basis) + nnodes_ = nnodes(basis) + + # compute everything using `Float64` by default + forward_upper_ = calc_forward_upper(nnodes_) + forward_lower_ = calc_forward_lower(nnodes_) + reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss)) + reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss)) + + # type conversions to get the requested real type and enable possible + # optimizations of runtime performance and latency + + # Usually as fast as `SMatrix` but better for latency + forward_upper = Matrix{RealT}(forward_upper_) + forward_lower = Matrix{RealT}(forward_lower_) + + # TODO: Taal performance + # Check the performance of different implementations of `mortar_fluxes_to_elements!` + # with different types of the reverse matrices and different types of + # `fstar_upper_threaded` etc. used in the cache. + # Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` can be faster than + # `@tullio` when the matrix sizes are not necessarily static. + # reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_) + # reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_) + reverse_upper = Matrix{RealT}(reverse_upper_) + reverse_lower = Matrix{RealT}(reverse_lower_) + + LobattoLegendreMortarL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper) + }(forward_upper, forward_lower, + reverse_upper, reverse_lower) end function Base.show(io::IO, mortar::LobattoLegendreMortarL2) - @nospecialize mortar # reduce precompilation time + @nospecialize mortar # reduce precompilation time - print(io, "LobattoLegendreMortarL2{", real(mortar), "}(polydeg=", polydeg(mortar), ")") + print(io, "LobattoLegendreMortarL2{", real(mortar), "}(polydeg=", polydeg(mortar), + ")") end function Base.show(io::IO, ::MIME"text/plain", mortar::LobattoLegendreMortarL2) - @nospecialize mortar # reduce precompilation time + @nospecialize mortar # reduce precompilation time - print(io, "LobattoLegendreMortarL2{", real(mortar), "} with polynomials of degree ", polydeg(mortar)) + print(io, "LobattoLegendreMortarL2{", real(mortar), "} with polynomials of degree ", + polydeg(mortar)) end @inline Base.real(mortar::LobattoLegendreMortarL2{RealT}) where {RealT} = RealT -@inline nnodes(mortar::LobattoLegendreMortarL2{RealT, NNODES}) where {RealT, NNODES} = NNODES +@inline function nnodes(mortar::LobattoLegendreMortarL2{RealT, NNODES}) where {RealT, + NNODES} + NNODES +end @inline polydeg(mortar::LobattoLegendreMortarL2) = nnodes(mortar) - 1 - # TODO: We can create EC mortars along the lines of the following implementation. # abstract type AbstractMortarEC{RealT} <: AbstractMortar{RealT} end @@ -230,49 +248,55 @@ end # @inline nnodes(mortar::LobattoLegendreMortarEC{RealT, NNODES}) = NNODES - - -struct LobattoLegendreAnalyzer{RealT<:Real, NNODES, - VectorT<:AbstractVector{RealT}, - Vandermonde<:AbstractMatrix{RealT}} <: SolutionAnalyzer{RealT} - nodes ::VectorT - weights::VectorT - vandermonde::Vandermonde +struct LobattoLegendreAnalyzer{RealT <: Real, NNODES, + VectorT <: AbstractVector{RealT}, + Vandermonde <: AbstractMatrix{RealT}} <: + SolutionAnalyzer{RealT} + nodes::VectorT + weights::VectorT + vandermonde::Vandermonde end -function SolutionAnalyzer(basis::LobattoLegendreBasis; analysis_polydeg=2*polydeg(basis)) - RealT = real(basis) - nnodes_ = analysis_polydeg + 1 +function SolutionAnalyzer(basis::LobattoLegendreBasis; + analysis_polydeg = 2 * polydeg(basis)) + RealT = real(basis) + nnodes_ = analysis_polydeg + 1 - # compute everything using `Float64` by default - nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_) - vandermonde_ = polynomial_interpolation_matrix(get_nodes(basis), nodes_) + # compute everything using `Float64` by default + nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_) + vandermonde_ = polynomial_interpolation_matrix(get_nodes(basis), nodes_) - # type conversions to get the requested real type and enable possible - # optimizations of runtime performance and latency - nodes = SVector{nnodes_, RealT}(nodes_) - weights = SVector{nnodes_, RealT}(weights_) + # type conversions to get the requested real type and enable possible + # optimizations of runtime performance and latency + nodes = SVector{nnodes_, RealT}(nodes_) + weights = SVector{nnodes_, RealT}(weights_) - vandermonde = Matrix{RealT}(vandermonde_) + vandermonde = Matrix{RealT}(vandermonde_) - return LobattoLegendreAnalyzer{RealT, nnodes_, typeof(nodes), typeof(vandermonde)}( - nodes, weights, vandermonde) + return LobattoLegendreAnalyzer{RealT, nnodes_, typeof(nodes), typeof(vandermonde)}(nodes, + weights, + vandermonde) end function Base.show(io::IO, analyzer::LobattoLegendreAnalyzer) - @nospecialize analyzer # reduce precompilation time + @nospecialize analyzer # reduce precompilation time - print(io, "LobattoLegendreAnalyzer{", real(analyzer), "}(polydeg=", polydeg(analyzer), ")") + print(io, "LobattoLegendreAnalyzer{", real(analyzer), "}(polydeg=", + polydeg(analyzer), ")") end function Base.show(io::IO, ::MIME"text/plain", analyzer::LobattoLegendreAnalyzer) - @nospecialize analyzer # reduce precompilation time + @nospecialize analyzer # reduce precompilation time - print(io, "LobattoLegendreAnalyzer{", real(analyzer), "} with polynomials of degree ", polydeg(analyzer)) + print(io, "LobattoLegendreAnalyzer{", real(analyzer), + "} with polynomials of degree ", polydeg(analyzer)) end @inline Base.real(analyzer::LobattoLegendreAnalyzer{RealT}) where {RealT} = RealT -@inline nnodes(analyzer::LobattoLegendreAnalyzer{RealT, NNODES}) where {RealT, NNODES} = NNODES +@inline function nnodes(analyzer::LobattoLegendreAnalyzer{RealT, NNODES}) where {RealT, + NNODES} + NNODES +end """ eachnode(analyzer::LobattoLegendreAnalyzer) @@ -284,67 +308,72 @@ In particular, not the nodes themselves are returned. @inline polydeg(analyzer::LobattoLegendreAnalyzer) = nnodes(analyzer) - 1 - - -struct LobattoLegendreAdaptorL2{RealT<:Real, NNODES, ForwardMatrix<:AbstractMatrix{RealT}, ReverseMatrix<:AbstractMatrix{RealT}} <: AdaptorL2{RealT} - forward_upper::ForwardMatrix - forward_lower::ForwardMatrix - reverse_upper::ReverseMatrix - reverse_lower::ReverseMatrix +struct LobattoLegendreAdaptorL2{RealT <: Real, NNODES, + ForwardMatrix <: AbstractMatrix{RealT}, + ReverseMatrix <: AbstractMatrix{RealT}} <: + AdaptorL2{RealT} + forward_upper::ForwardMatrix + forward_lower::ForwardMatrix + reverse_upper::ReverseMatrix + reverse_lower::ReverseMatrix end function AdaptorL2(basis::LobattoLegendreBasis{RealT}) where {RealT} - nnodes_ = nnodes(basis) - - # compute everything using `Float64` by default - forward_upper_ = calc_forward_upper(nnodes_) - forward_lower_ = calc_forward_lower(nnodes_) - reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss)) - reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss)) - - # type conversions to get the requested real type and enable possible - # optimizations of runtime performance and latency - - # TODO: Taal performance - # Check the performance of different implementations of - # `refine_elements!` (forward) and `coarsen_elements!` (reverse) - # with different types of the matrices. - # Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` - # can be faster than `@tullio` when the matrix sizes are not necessarily - # static. - forward_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_upper_) - forward_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_lower_) - # forward_upper = Matrix{RealT}(forward_upper_) - # forward_lower = Matrix{RealT}(forward_lower_) - - reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_) - reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_) - # reverse_upper = Matrix{RealT}(reverse_upper_) - # reverse_lower = Matrix{RealT}(reverse_lower_) - - LobattoLegendreAdaptorL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper)}( - forward_upper, forward_lower, - reverse_upper, reverse_lower) + nnodes_ = nnodes(basis) + + # compute everything using `Float64` by default + forward_upper_ = calc_forward_upper(nnodes_) + forward_lower_ = calc_forward_lower(nnodes_) + reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss)) + reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss)) + + # type conversions to get the requested real type and enable possible + # optimizations of runtime performance and latency + + # TODO: Taal performance + # Check the performance of different implementations of + # `refine_elements!` (forward) and `coarsen_elements!` (reverse) + # with different types of the matrices. + # Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` + # can be faster than `@tullio` when the matrix sizes are not necessarily + # static. + forward_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_upper_) + forward_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_lower_) + # forward_upper = Matrix{RealT}(forward_upper_) + # forward_lower = Matrix{RealT}(forward_lower_) + + reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_) + reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_) + # reverse_upper = Matrix{RealT}(reverse_upper_) + # reverse_lower = Matrix{RealT}(reverse_lower_) + + LobattoLegendreAdaptorL2{RealT, nnodes_, typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) end function Base.show(io::IO, adaptor::LobattoLegendreAdaptorL2) - @nospecialize adaptor # reduce precompilation time + @nospecialize adaptor # reduce precompilation time - print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "}(polydeg=", polydeg(adaptor), ")") + print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "}(polydeg=", + polydeg(adaptor), ")") end function Base.show(io::IO, ::MIME"text/plain", adaptor::LobattoLegendreAdaptorL2) - @nospecialize adaptor # reduce precompilation time + @nospecialize adaptor # reduce precompilation time - print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "} with polynomials of degree ", polydeg(adaptor)) + print(io, "LobattoLegendreAdaptorL2{", real(adaptor), + "} with polynomials of degree ", polydeg(adaptor)) end @inline Base.real(adaptor::LobattoLegendreAdaptorL2{RealT}) where {RealT} = RealT -@inline nnodes(adaptor::LobattoLegendreAdaptorL2{RealT, NNODES}) where {RealT, NNODES} = NNODES +@inline function nnodes(adaptor::LobattoLegendreAdaptorL2{RealT, NNODES}) where {RealT, + NNODES} + NNODES +end @inline polydeg(adaptor::LobattoLegendreAdaptorL2) = nnodes(adaptor) - 1 - ############################################################################### # Polynomial derivative and interpolation functions @@ -352,342 +381,337 @@ end # Calculate the Dhat matrix function calc_dhat(nodes, weights) - n_nodes = length(nodes) - dhat = Matrix(polynomial_derivative_matrix(nodes)') + n_nodes = length(nodes) + dhat = Matrix(polynomial_derivative_matrix(nodes)') - for n in 1:n_nodes, j in 1:n_nodes - dhat[j, n] *= -weights[n] / weights[j] - end + for n in 1:n_nodes, j in 1:n_nodes + dhat[j, n] *= -weights[n] / weights[j] + end - return dhat + return dhat end - # Calculate the Dsplit matrix for split-form differentiation: dplit = 2D - M⁻¹B function calc_dsplit(nodes, weights) - # Start with 2 x the normal D matrix - dsplit = 2 .* polynomial_derivative_matrix(nodes) + # Start with 2 x the normal D matrix + dsplit = 2 .* polynomial_derivative_matrix(nodes) - # Modify to account for - dsplit[ 1, 1] += 1 / weights[1] - dsplit[end, end] -= 1 / weights[end] + # Modify to account for + dsplit[1, 1] += 1 / weights[1] + dsplit[end, end] -= 1 / weights[end] - return dsplit + return dsplit end - # Calculate the polynomial derivative matrix D function polynomial_derivative_matrix(nodes) - n_nodes = length(nodes) - d = zeros(n_nodes, n_nodes) - wbary = barycentric_weights(nodes) - - for i in 1:n_nodes, j in 1:n_nodes - if j != i - d[i, j] = wbary[j] / wbary[i] * 1 / (nodes[i] - nodes[j]) - d[i, i] -= d[i, j] + n_nodes = length(nodes) + d = zeros(n_nodes, n_nodes) + wbary = barycentric_weights(nodes) + + for i in 1:n_nodes, j in 1:n_nodes + if j != i + d[i, j] = wbary[j] / wbary[i] * 1 / (nodes[i] - nodes[j]) + d[i, i] -= d[i, j] + end end - end - return d + return d end - # Calculate and interpolation matrix (Vandermonde matrix) between two given sets of nodes function polynomial_interpolation_matrix(nodes_in, nodes_out, - baryweights_in=barycentric_weights(nodes_in)) - n_nodes_in = length(nodes_in) - n_nodes_out = length(nodes_out) - vandermonde = Matrix{promote_type(eltype(nodes_in), eltype(nodes_out))}(undef, - n_nodes_out, n_nodes_in) - polynomial_interpolation_matrix!(vandermonde, nodes_in, nodes_out, baryweights_in) - - return vandermonde + baryweights_in = barycentric_weights(nodes_in)) + n_nodes_in = length(nodes_in) + n_nodes_out = length(nodes_out) + vandermonde = Matrix{promote_type(eltype(nodes_in), eltype(nodes_out))}(undef, + n_nodes_out, + n_nodes_in) + polynomial_interpolation_matrix!(vandermonde, nodes_in, nodes_out, baryweights_in) + + return vandermonde end function polynomial_interpolation_matrix!(vandermonde, nodes_in, nodes_out, baryweights_in) - fill!(vandermonde, zero(eltype(vandermonde))) - - for k in eachindex(nodes_out) - match = false - for j in eachindex(nodes_in) - if isapprox(nodes_out[k], nodes_in[j]) - match = true - vandermonde[k, j] = 1 - end - end + fill!(vandermonde, zero(eltype(vandermonde))) + + for k in eachindex(nodes_out) + match = false + for j in eachindex(nodes_in) + if isapprox(nodes_out[k], nodes_in[j]) + match = true + vandermonde[k, j] = 1 + end + end - if match == false - s = zero(eltype(vandermonde)) - for j in eachindex(nodes_in) - t = baryweights_in[j] / (nodes_out[k] - nodes_in[j]) - vandermonde[k, j] = t - s += t - end - for j in eachindex(nodes_in) - vandermonde[k, j] = vandermonde[k, j] / s - end + if match == false + s = zero(eltype(vandermonde)) + for j in eachindex(nodes_in) + t = baryweights_in[j] / (nodes_out[k] - nodes_in[j]) + vandermonde[k, j] = t + s += t + end + for j in eachindex(nodes_in) + vandermonde[k, j] = vandermonde[k, j] / s + end + end end - end - return vandermonde + return vandermonde end - # Calculate the barycentric weights for a given node distribution. function barycentric_weights(nodes) - n_nodes = length(nodes) - weights = ones(n_nodes) + n_nodes = length(nodes) + weights = ones(n_nodes) - for j = 2:n_nodes, k = 1:(j-1) - weights[k] *= nodes[k] - nodes[j] - weights[j] *= nodes[j] - nodes[k] - end + for j in 2:n_nodes, k in 1:(j - 1) + weights[k] *= nodes[k] - nodes[j] + weights[j] *= nodes[j] - nodes[k] + end - for j in 1:n_nodes - weights[j] = 1 / weights[j] - end + for j in 1:n_nodes + weights[j] = 1 / weights[j] + end - return weights + return weights end - # Calculate Lhat. function calc_lhat(x, nodes, weights) - n_nodes = length(nodes) - wbary = barycentric_weights(nodes) + n_nodes = length(nodes) + wbary = barycentric_weights(nodes) - lhat = lagrange_interpolating_polynomials(x, nodes, wbary) + lhat = lagrange_interpolating_polynomials(x, nodes, wbary) - for i in 1:n_nodes - lhat[i] /= weights[i] - end + for i in 1:n_nodes + lhat[i] /= weights[i] + end - return lhat + return lhat end - # Calculate Lagrange polynomials for a given node distribution. function lagrange_interpolating_polynomials(x, nodes, wbary) - n_nodes = length(nodes) - polynomials = zeros(n_nodes) + n_nodes = length(nodes) + polynomials = zeros(n_nodes) - for i in 1:n_nodes - if isapprox(x, nodes[i], rtol=eps(x)) - polynomials[i] = 1 - return polynomials + for i in 1:n_nodes + if isapprox(x, nodes[i], rtol = eps(x)) + polynomials[i] = 1 + return polynomials + end end - end - for i in 1:n_nodes - polynomials[i] = wbary[i] / (x - nodes[i]) - end - total = sum(polynomials) + for i in 1:n_nodes + polynomials[i] = wbary[i] / (x - nodes[i]) + end + total = sum(polynomials) - for i in 1:n_nodes - polynomials[i] /= total - end + for i in 1:n_nodes + polynomials[i] /= total + end - return polynomials + return polynomials end - # From FLUXO (but really from blue book by Kopriva) function gauss_lobatto_nodes_weights(n_nodes::Integer) - # From Kopriva's book - n_iterations = 10 - tolerance = 1e-15 - - # Initialize output - nodes = zeros(n_nodes) - weights = zeros(n_nodes) - - # Get polynomial degree for convenience - N = n_nodes - 1 - - # Calculate values at boundary - nodes[1] = -1.0 - nodes[end] = 1.0 - weights[1] = 2 / (N * (N + 1)) - weights[end] = weights[1] - - # Calculate interior values - if N > 1 - cont1 = pi/N - cont2 = 3/(8 * N * pi) - - # Use symmetry -> only left side is computed - for i in 1:(div(N + 1, 2) - 1) - # Calculate node - # Initial guess for Newton method - nodes[i+1] = -cos(cont1*(i+0.25) - cont2/(i+0.25)) - - # Newton iteration to find root of Legendre polynomial (= integration node) - for k in 0:n_iterations - q, qder, _ = calc_q_and_l(N, nodes[i+1]) - dx = -q/qder - nodes[i+1] += dx - if abs(dx) < tolerance * abs(nodes[i+1]) - break - end - end - - # Calculate weight - _, _, L = calc_q_and_l(N, nodes[i+1]) - weights[i+1] = weights[1] / L^2 + # From Kopriva's book + n_iterations = 10 + tolerance = 1e-15 + + # Initialize output + nodes = zeros(n_nodes) + weights = zeros(n_nodes) + + # Special case for polynomial degree zero (first order finite volume) + if n_nodes == 1 + nodes[1] = 0 + weights[1] = 2 + return nodes, weights + end - # Set nodes and weights according to symmetry properties - nodes[N+1-i] = -nodes[i+1] - weights[N+1-i] = weights[i+1] + # Get polynomial degree for convenience + N = n_nodes - 1 + + # Calculate values at boundary + nodes[1] = -1.0 + nodes[end] = 1.0 + weights[1] = 2 / (N * (N + 1)) + weights[end] = weights[1] + + # Calculate interior values + if N > 1 + cont1 = pi / N + cont2 = 3 / (8 * N * pi) + + # Use symmetry -> only left side is computed + for i in 1:(div(N + 1, 2) - 1) + # Calculate node + # Initial guess for Newton method + nodes[i + 1] = -cos(cont1 * (i + 0.25) - cont2 / (i + 0.25)) + + # Newton iteration to find root of Legendre polynomial (= integration node) + for k in 0:n_iterations + q, qder, _ = calc_q_and_l(N, nodes[i + 1]) + dx = -q / qder + nodes[i + 1] += dx + if abs(dx) < tolerance * abs(nodes[i + 1]) + break + end + end + + # Calculate weight + _, _, L = calc_q_and_l(N, nodes[i + 1]) + weights[i + 1] = weights[1] / L^2 + + # Set nodes and weights according to symmetry properties + nodes[N + 1 - i] = -nodes[i + 1] + weights[N + 1 - i] = weights[i + 1] + end end - end - # If odd number of nodes, set center node to origin (= 0.0) and calculate weight - if n_nodes % 2 == 1 - _, _, L = calc_q_and_l(N, 0) - nodes[div(N, 2) + 1] = 0.0 - weights[div(N, 2) + 1] = weights[1] / L^2 - end + # If odd number of nodes, set center node to origin (= 0.0) and calculate weight + if n_nodes % 2 == 1 + _, _, L = calc_q_and_l(N, 0) + nodes[div(N, 2) + 1] = 0.0 + weights[div(N, 2) + 1] = weights[1] / L^2 + end - return nodes, weights + return nodes, weights end - # From FLUXO (but really from blue book by Kopriva) function calc_q_and_l(N::Integer, x::Float64) - L_Nm2 = 1.0 - L_Nm1 = x - Lder_Nm2 = 0.0 - Lder_Nm1 = 1.0 - - local L - for i in 2:N - L = ((2 * i - 1) * x * L_Nm1 - (i - 1) * L_Nm2) / i - Lder = Lder_Nm2 + (2 * i - 1) * L_Nm1 - L_Nm2 = L_Nm1 - L_Nm1 = L - Lder_Nm2 = Lder_Nm1 - Lder_Nm1 = Lder - end - - q = (2 * N + 1)/(N + 1) * (x * L - L_Nm2) - qder = (2 * N + 1) * L - - return q, qder, L + L_Nm2 = 1.0 + L_Nm1 = x + Lder_Nm2 = 0.0 + Lder_Nm1 = 1.0 + + local L + for i in 2:N + L = ((2 * i - 1) * x * L_Nm1 - (i - 1) * L_Nm2) / i + Lder = Lder_Nm2 + (2 * i - 1) * L_Nm1 + L_Nm2 = L_Nm1 + L_Nm1 = L + Lder_Nm2 = Lder_Nm1 + Lder_Nm1 = Lder + end + + q = (2 * N + 1) / (N + 1) * (x * L - L_Nm2) + qder = (2 * N + 1) * L + + return q, qder, L end calc_q_and_l(N::Integer, x::Real) = calc_q_and_l(N, convert(Float64, x)) - # From FLUXO (but really from blue book by Kopriva) function gauss_nodes_weights(n_nodes::Integer) - # From Kopriva's book - n_iterations = 10 - tolerance = 1e-15 - - # Initialize output - nodes = ones(n_nodes) * 1000 - weights = zeros(n_nodes) - - # Get polynomial degree for convenience - N = n_nodes - 1 - if N == 0 - nodes .= 0.0 - weights .= 2.0 - return nodes, weights - elseif N == 1 - nodes[1] = -sqrt(1/3) - nodes[end] = -nodes[1] - weights .= 1.0 - return nodes, weights - else # N > 1 - # Use symmetry property of the roots of the Legendre polynomials - for i in 0:(div(N + 1, 2) - 1) - # Starting guess for Newton method - nodes[i+1] = -cos(pi / (2 * N + 2) * (2 * i + 1)) - - # Newton iteration to find root of Legendre polynomial (= integration node) - for k in 0:n_iterations - poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i+1]) - dx = -poly / deriv - nodes[i+1] += dx - if abs(dx) < tolerance * abs(nodes[i+1]) - break + # From Kopriva's book + n_iterations = 10 + tolerance = 1e-15 + + # Initialize output + nodes = ones(n_nodes) * 1000 + weights = zeros(n_nodes) + + # Get polynomial degree for convenience + N = n_nodes - 1 + if N == 0 + nodes .= 0.0 + weights .= 2.0 + return nodes, weights + elseif N == 1 + nodes[1] = -sqrt(1 / 3) + nodes[end] = -nodes[1] + weights .= 1.0 + return nodes, weights + else # N > 1 + # Use symmetry property of the roots of the Legendre polynomials + for i in 0:(div(N + 1, 2) - 1) + # Starting guess for Newton method + nodes[i + 1] = -cos(pi / (2 * N + 2) * (2 * i + 1)) + + # Newton iteration to find root of Legendre polynomial (= integration node) + for k in 0:n_iterations + poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i + 1]) + dx = -poly / deriv + nodes[i + 1] += dx + if abs(dx) < tolerance * abs(nodes[i + 1]) + break + end + end + + # Calculate weight + poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i + 1]) + weights[i + 1] = (2 * N + 3) / ((1 - nodes[i + 1]^2) * deriv^2) + + # Set nodes and weights according to symmetry properties + nodes[N + 1 - i] = -nodes[i + 1] + weights[N + 1 - i] = weights[i + 1] end - end - # Calculate weight - poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i+1]) - weights[i+1] = (2 * N + 3) / ((1 - nodes[i+1]^2) * deriv^2) - - # Set nodes and weights according to symmetry properties - nodes[N+1-i] = -nodes[i+1] - weights[N+1-i] = weights[i+1] - end + # If odd number of nodes, set center node to origin (= 0.0) and calculate weight + if n_nodes % 2 == 1 + poly, deriv = legendre_polynomial_and_derivative(N + 1, 0.0) + nodes[div(N, 2) + 1] = 0.0 + weights[div(N, 2) + 1] = (2 * N + 3) / deriv^2 + end - # If odd number of nodes, set center node to origin (= 0.0) and calculate weight - if n_nodes % 2 == 1 - poly, deriv = legendre_polynomial_and_derivative(N + 1, 0.0) - nodes[div(N, 2) + 1] = 0.0 - weights[div(N, 2) + 1] = (2 * N + 3) / deriv^2 + return nodes, weights end - - return nodes, weights - end end - # From FLUXO (but really from blue book by Kopriva) function legendre_polynomial_and_derivative(N::Int, x::Real) - if N == 0 - poly = 1.0 - deriv = 0.0 - elseif N == 1 - poly = convert(Float64, x) - deriv = 1.0 - else - poly_Nm2 = 1.0 - poly_Nm1 = convert(Float64, x) - deriv_Nm2 = 0.0 - deriv_Nm1 = 1.0 - - poly = 0.0 - deriv = 0.0 - for i in 2:N - poly = ((2*i-1) * x * poly_Nm1 - (i-1) * poly_Nm2) / i - deriv=deriv_Nm2 + (2*i-1)*poly_Nm1 - poly_Nm2=poly_Nm1 - poly_Nm1=poly - deriv_Nm2=deriv_Nm1 - deriv_Nm1=deriv + if N == 0 + poly = 1.0 + deriv = 0.0 + elseif N == 1 + poly = convert(Float64, x) + deriv = 1.0 + else + poly_Nm2 = 1.0 + poly_Nm1 = convert(Float64, x) + deriv_Nm2 = 0.0 + deriv_Nm1 = 1.0 + + poly = 0.0 + deriv = 0.0 + for i in 2:N + poly = ((2 * i - 1) * x * poly_Nm1 - (i - 1) * poly_Nm2) / i + deriv = deriv_Nm2 + (2 * i - 1) * poly_Nm1 + poly_Nm2 = poly_Nm1 + poly_Nm1 = poly + deriv_Nm2 = deriv_Nm1 + deriv_Nm1 = deriv + end end - end - # Normalize - poly = poly * sqrt(N+0.5) - deriv = deriv * sqrt(N+0.5) + # Normalize + poly = poly * sqrt(N + 0.5) + deriv = deriv * sqrt(N + 0.5) - return poly, deriv + return poly, deriv end - # Calculate Legendre vandermonde matrix and its inverse function vandermonde_legendre(nodes, N) - n_nodes = length(nodes) - n_modes = N + 1 - vandermonde = zeros(n_nodes, n_modes) + n_nodes = length(nodes) + n_modes = N + 1 + vandermonde = zeros(n_nodes, n_modes) - for i in 1:n_nodes - for m in 1:n_modes - vandermonde[i, m], _ = legendre_polynomial_and_derivative(m-1, nodes[i]) + for i in 1:n_nodes + for m in 1:n_modes + vandermonde[i, m], _ = legendre_polynomial_and_derivative(m - 1, nodes[i]) + end end - end - # for very high polynomial degree, this is not well conditioned - inverse_vandermonde = inv(vandermonde) - return vandermonde, inverse_vandermonde + # for very high polynomial degree, this is not well conditioned + inverse_vandermonde = inv(vandermonde) + return vandermonde, inverse_vandermonde end vandermonde_legendre(nodes) = vandermonde_legendre(nodes, length(nodes) - 1) - - end # @muladd diff --git a/src/solvers/dgsem/dgsem.jl b/src/solvers/dgsem/dgsem.jl index 0e81fdb7bde..27caad4d2dc 100644 --- a/src/solvers/dgsem/dgsem.jl +++ b/src/solvers/dgsem/dgsem.jl @@ -3,14 +3,13 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Include utilities include("interpolation.jl") include("l2projection.jl") include("basis_lobatto_legendre.jl") - """ DGSEM(; RealT=Float64, polydeg::Integer, surface_flux=flux_central, @@ -21,57 +20,55 @@ include("basis_lobatto_legendre.jl") Create a discontinuous Galerkin spectral element method (DGSEM) using a [`LobattoLegendreBasis`](@ref) with polynomials of degree `polydeg`. """ -const DGSEM = DG{Basis} where {Basis<:LobattoLegendreBasis} +const DGSEM = DG{Basis} where {Basis <: LobattoLegendreBasis} # TODO: Deprecated in v0.3 (no longer documented) function DGSEM(basis::LobattoLegendreBasis, - surface_flux=flux_central, - volume_integral=VolumeIntegralWeakForm(), - mortar=MortarL2(basis)) - - surface_integral = SurfaceIntegralWeakForm(surface_flux) - return DG{typeof(basis), typeof(mortar), typeof(surface_integral), typeof(volume_integral)}( - basis, mortar, surface_integral, volume_integral) + surface_flux = flux_central, + volume_integral = VolumeIntegralWeakForm(), + mortar = MortarL2(basis)) + surface_integral = SurfaceIntegralWeakForm(surface_flux) + return DG{typeof(basis), typeof(mortar), typeof(surface_integral), + typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral) end # TODO: Deprecated in v0.3 (no longer documented) function DGSEM(basis::LobattoLegendreBasis, surface_integral::AbstractSurfaceIntegral, - volume_integral=VolumeIntegralWeakForm(), - mortar=MortarL2(basis)) - - return DG{typeof(basis), typeof(mortar), typeof(surface_integral), typeof(volume_integral)}( - basis, mortar, surface_integral, volume_integral) + volume_integral = VolumeIntegralWeakForm(), + mortar = MortarL2(basis)) + return DG{typeof(basis), typeof(mortar), typeof(surface_integral), + typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral) end # TODO: Deprecated in v0.3 (no longer documented) function DGSEM(RealT, polydeg::Integer, - surface_flux=flux_central, - volume_integral=VolumeIntegralWeakForm(), - mortar=MortarL2(LobattoLegendreBasis(RealT, polydeg))) - basis = LobattoLegendreBasis(RealT, polydeg) + surface_flux = flux_central, + volume_integral = VolumeIntegralWeakForm(), + mortar = MortarL2(LobattoLegendreBasis(RealT, polydeg))) + basis = LobattoLegendreBasis(RealT, polydeg) - return DGSEM(basis, surface_flux, volume_integral, mortar) + return DGSEM(basis, surface_flux, volume_integral, mortar) end -DGSEM(polydeg, surface_flux=flux_central, volume_integral=VolumeIntegralWeakForm()) = DGSEM(Float64, polydeg, surface_flux, volume_integral) +function DGSEM(polydeg, surface_flux = flux_central, + volume_integral = VolumeIntegralWeakForm()) + DGSEM(Float64, polydeg, surface_flux, volume_integral) +end # The constructor using only keyword arguments is convenient for elixirs since # it allows to modify the polynomial degree and other parameters via # `trixi_include`. -function DGSEM(; RealT=Float64, - polydeg::Integer, - surface_flux=flux_central, - surface_integral=SurfaceIntegralWeakForm(surface_flux), - volume_integral=VolumeIntegralWeakForm()) - basis = LobattoLegendreBasis(RealT, polydeg) - return DGSEM(basis, surface_integral, volume_integral) +function DGSEM(; RealT = Float64, + polydeg::Integer, + surface_flux = flux_central, + surface_integral = SurfaceIntegralWeakForm(surface_flux), + volume_integral = VolumeIntegralWeakForm()) + basis = LobattoLegendreBasis(RealT, polydeg) + return DGSEM(basis, surface_integral, volume_integral) end @inline polydeg(dg::DGSEM) = polydeg(dg.basis) Base.summary(io::IO, dg::DGSEM) = print(io, "DGSEM(polydeg=$(polydeg(dg)))") - - - end # @muladd diff --git a/src/solvers/dgsem/interpolation.jl b/src/solvers/dgsem/interpolation.jl index bf54d518ee2..3f8f61c072f 100644 --- a/src/solvers/dgsem/interpolation.jl +++ b/src/solvers/dgsem/interpolation.jl @@ -2,55 +2,61 @@ # Naive implementations of multiply_dimensionwise used to demonstrate the functionality # without performance optimizations and for testing correctness of the optimized versions # implemented below. -function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2}) - size_out = size(matrix, 1) - size_in = size(matrix, 2) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out) - - for i in 1:size_out - for ii in 1:size_in - for v in 1:n_vars - data_out[v, i] += matrix[i, ii] * data_in[v, ii] - end +function multiply_dimensionwise_naive(matrix::AbstractMatrix, + data_in::AbstractArray{<:Any, 2}) + size_out = size(matrix, 1) + size_in = size(matrix, 2) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out) + + for i in 1:size_out + for ii in 1:size_in + for v in 1:n_vars + data_out[v, i] += matrix[i, ii] * data_in[v, ii] + end + end end - end - return data_out + return data_out end -function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3}) - size_out = size(matrix, 1) - size_in = size(matrix, 2) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out) - - for j in 1:size_out, i in 1:size_out - for jj in 1:size_in, ii in 1:size_in - for v in 1:n_vars - data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj] - end +function multiply_dimensionwise_naive(matrix::AbstractMatrix, + data_in::AbstractArray{<:Any, 3}) + size_out = size(matrix, 1) + size_in = size(matrix, 2) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, + size_out) + + for j in 1:size_out, i in 1:size_out + for jj in 1:size_in, ii in 1:size_in + for v in 1:n_vars + data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj] + end + end end - end - return data_out + return data_out end -function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4}) - size_out = size(matrix, 1) - size_in = size(matrix, 2) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out, size_out) - - for k in 1:size_out, j in 1:size_out, i in 1:size_out - for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in - for v in 1:n_vars - data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] * data_in[v, ii, jj, kk] - end +function multiply_dimensionwise_naive(matrix::AbstractMatrix, + data_in::AbstractArray{<:Any, 4}) + size_out = size(matrix, 1) + size_in = size(matrix, 2) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, + size_out, size_out) + + for k in 1:size_out, j in 1:size_out, i in 1:size_out + for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in + for v in 1:n_vars + data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] * + data_in[v, ii, jj, kk] + end + end end - end - return data_out + return data_out end """ @@ -61,42 +67,43 @@ is assumed to have the first coordinate for the number of variables and the rema are multiplied by `matrix`. """ function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2}) - # 1D - # optimized version of multiply_dimensionwise_naive - size_out = size(matrix, 1) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out) + # 1D + # optimized version of multiply_dimensionwise_naive + size_out = size(matrix, 1) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out) - multiply_dimensionwise!(data_out, matrix, data_in) + multiply_dimensionwise!(data_out, matrix, data_in) - return data_out + return data_out end function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3}) - # 2D - # optimized version of multiply_dimensionwise_naive - size_out = size(matrix, 1) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out) + # 2D + # optimized version of multiply_dimensionwise_naive + size_out = size(matrix, 1) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, + size_out) - multiply_dimensionwise!(data_out, matrix, data_in) + multiply_dimensionwise!(data_out, matrix, data_in) - return data_out + return data_out end function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4}) - # 3D - # optimized version of multiply_dimensionwise_naive - size_out = size(matrix, 1) - n_vars = size(data_in, 1) - data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out, size_out) + # 3D + # optimized version of multiply_dimensionwise_naive + size_out = size(matrix, 1) + n_vars = size(data_in, 1) + data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, + size_out, size_out) - multiply_dimensionwise!(data_out, matrix, data_in) + multiply_dimensionwise!(data_out, matrix, data_in) - return data_out + return data_out end - # In the following, there are several optimized in-place versions of multiply_dimensionwise. # These may make use of advanced optimization features such as the macro `@tullio` from Tullio.jl, # which basically uses an Einstein summation convention syntax. @@ -106,17 +113,17 @@ end # 1D version function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix::AbstractMatrix, - data_in ::AbstractArray{<:Any, 2}) - # @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii] - @turbo for i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[v, ii] + data_in::AbstractArray{<:Any, 2}) + # @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii] + @turbo for i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[v, ii] + end + data_out[v, i] = res end - data_out[v, i] = res - end - return nothing + return nothing end # 1D version for scalars @@ -124,73 +131,74 @@ end # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`. function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 1}, matrix::AbstractMatrix, - data_in ::AbstractArray{<:Any, 1}) - # @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii] - @turbo for i in axes(data_out, 1) - res = zero(eltype(data_out)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[ii] + data_in::AbstractArray{<:Any, 1}) + # @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii] + @turbo for i in axes(data_out, 1) + res = zero(eltype(data_out)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[ii] + end + data_out[i] = res end - data_out[i] = res - end - return nothing + return nothing end # 1D version, apply matrixJ to data_inJ function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix1::AbstractMatrix, data_in1::AbstractArray{<:Any, 2}, matrix2::AbstractMatrix, data_in2::AbstractArray{<:Any, 2}) - # @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii] - # TODO: LoopVectorization upgrade - # We would like to use `@turbo` for the outermost loop possibly fuse both inner - # loops, but that does currently not work because of limitations of - # LoopVectorizationjl. However, Chris Elrod is planning to address this in - # the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972 - @turbo for i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for ii in axes(matrix1, 2) - res += matrix1[i, ii] * data_in1[v, ii] + # @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii] + # TODO: LoopVectorization upgrade + # We would like to use `@turbo` for the outermost loop possibly fuse both inner + # loops, but that does currently not work because of limitations of + # LoopVectorizationjl. However, Chris Elrod is planning to address this in + # the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972 + @turbo for i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for ii in axes(matrix1, 2) + res += matrix1[i, ii] * data_in1[v, ii] + end + data_out[v, i] = res end - data_out[v, i] = res - end - @turbo for i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for ii in axes(matrix2, 2) - res += matrix2[i, ii] * data_in2[v, ii] + @turbo for i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for ii in axes(matrix2, 2) + res += matrix2[i, ii] * data_in2[v, ii] + end + data_out[v, i] += res end - data_out[v, i] += res - end - return nothing + return nothing end # 2D version function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix, - data_in:: AbstractArray{<:Any, 3}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j] = matrix[i, ii] * data_in[v, ii, j] - @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[v, ii, j] + data_in::AbstractArray{<:Any, 3}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix, 1), size(matrix, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j] = matrix[i, ii] * data_in[v, ii, j] + @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) + res = zero(eltype(tmp1)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[v, ii, j] + end + tmp1[v, i, j] = res end - tmp1[v, i, j] = res - end - - # Interpolate in y-direction - # @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj] - @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for jj in axes(matrix, 2) - res += matrix[j, jj] * tmp1[v, i, jj] + + # Interpolate in y-direction + # @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj] + @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * tmp1[v, i, jj] + end + data_out[v, i, j] = res end - data_out[v, i, j] = res - end - return nothing + return nothing end # 2D version for scalars @@ -198,246 +206,284 @@ end # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`. function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix::AbstractMatrix, - data_in:: AbstractArray{<:Any, 2}, - tmp1=zeros(eltype(data_out), size(matrix, 1), size(matrix, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[i, j] = matrix[i, ii] * data_in[ii, j] - @turbo for j in axes(tmp1, 2), i in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[ii, j] + data_in::AbstractArray{<:Any, 2}, + tmp1 = zeros(eltype(data_out), size(matrix, 1), + size(matrix, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[i, j] = matrix[i, ii] * data_in[ii, j] + @turbo for j in axes(tmp1, 2), i in axes(tmp1, 1) + res = zero(eltype(tmp1)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[ii, j] + end + tmp1[i, j] = res end - tmp1[i, j] = res - end - - # Interpolate in y-direction - # @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj] - @turbo for j in axes(data_out, 2), i in axes(data_out, 1) - res = zero(eltype(data_out)) - for jj in axes(matrix, 2) - res += matrix[j, jj] * tmp1[i, jj] + + # Interpolate in y-direction + # @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj] + @turbo for j in axes(data_out, 2), i in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * tmp1[i, jj] + end + data_out[i, j] = res end - data_out[i, j] = res - end - return nothing + return nothing end # 2D version, apply matrixJ to dimension J of data_in function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix1::AbstractMatrix, matrix2::AbstractMatrix, - data_in:: AbstractArray{<:Any, 3}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j] - @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix1, 2) - res += matrix1[i, ii] * data_in[v, ii, j] + data_in::AbstractArray{<:Any, 3}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j] + @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) + res = zero(eltype(tmp1)) + for ii in axes(matrix1, 2) + res += matrix1[i, ii] * data_in[v, ii, j] + end + tmp1[v, i, j] = res end - tmp1[v, i, j] = res - end - - # Interpolate in y-direction - # @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj] - @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for jj in axes(matrix2, 2) - res += matrix2[j, jj] * tmp1[v, i, jj] + + # Interpolate in y-direction + # @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj] + @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix2, 2) + res += matrix2[j, jj] * tmp1[v, i, jj] + end + data_out[v, i, j] = res end - data_out[v, i, j] = res - end - return nothing + return nothing end # 2D version, apply matrixJ to dimension J of data_in and add the result to data_out function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix1::AbstractMatrix, matrix2::AbstractMatrix, - data_in:: AbstractArray{<:Any, 3}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j] - @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix1, 2) - res += matrix1[i, ii] * data_in[v, ii, j] + data_in::AbstractArray{<:Any, 3}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j] + @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) + res = zero(eltype(tmp1)) + for ii in axes(matrix1, 2) + res += matrix1[i, ii] * data_in[v, ii, j] + end + tmp1[v, i, j] = res end - tmp1[v, i, j] = res - end - - # Interpolate in y-direction - # @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj] - @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for jj in axes(matrix2, 2) - res += matrix2[j, jj] * tmp1[v, i, jj] + + # Interpolate in y-direction + # @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj] + @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix2, 2) + res += matrix2[j, jj] * tmp1[v, i, jj] + end + data_out[v, i, j] += res end - data_out[v, i, j] += res - end - return nothing + return nothing end # 3D version function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, matrix::AbstractMatrix, - data_in:: AbstractArray{<:Any, 4}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2), size(matrix, 2)), - tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 1), size(matrix, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j, k] = matrix[i, ii] * data_in[v, ii, j, k] - @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[v, ii, j, k] + data_in::AbstractArray{<:Any, 4}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix, 1), size(matrix, 2), + size(matrix, 2)), + tmp2 = zeros(eltype(data_out), size(data_out, 1), + size(matrix, 1), size(matrix, 1), + size(matrix, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j, k] = matrix[i, ii] * data_in[v, ii, j, k] + @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), + v in axes(tmp1, 1) + + res = zero(eltype(tmp1)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[v, ii, j, k] + end + tmp1[v, i, j, k] = res end - tmp1[v, i, j, k] = res - end - - # Interpolate in y-direction - # @tullio threads=false tmp2[v, i, j, k] = matrix[j, jj] * tmp1[v, i, jj, k] - @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1) - res = zero(eltype(tmp2)) - for jj in axes(matrix, 2) - res += matrix[j, jj] * tmp1[v, i, jj, k] + + # Interpolate in y-direction + # @tullio threads=false tmp2[v, i, j, k] = matrix[j, jj] * tmp1[v, i, jj, k] + @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), + v in axes(tmp2, 1) + + res = zero(eltype(tmp2)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * tmp1[v, i, jj, k] + end + tmp2[v, i, j, k] = res end - tmp2[v, i, j, k] = res - end - - # Interpolate in z-direction - # @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk] - @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for kk in axes(matrix, 2) - res += matrix[k, kk] * tmp2[v, i, j, kk] + + # Interpolate in z-direction + # @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk] + @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), + v in axes(data_out, 1) + + res = zero(eltype(data_out)) + for kk in axes(matrix, 2) + res += matrix[k, kk] * tmp2[v, i, j, kk] + end + data_out[v, i, j, k] = res end - data_out[v, i, j, k] = res - end - return nothing + return nothing end # 3D version for scalars # Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`. -function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix, - data_in:: AbstractArray{<:Any, 3}, - tmp1=zeros(eltype(data_out), size(matrix, 1), size(matrix, 2), size(matrix, 2)), - tmp2=zeros(eltype(data_out), size(matrix, 1), size(matrix, 1), size(matrix, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[i, j, k] = matrix[i, ii] * data_in[ii, j, k] - @turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix, 2) - res += matrix[i, ii] * data_in[ii, j, k] +function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3}, + matrix::AbstractMatrix, + data_in::AbstractArray{<:Any, 3}, + tmp1 = zeros(eltype(data_out), size(matrix, 1), + size(matrix, 2), size(matrix, 2)), + tmp2 = zeros(eltype(data_out), size(matrix, 1), + size(matrix, 1), size(matrix, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[i, j, k] = matrix[i, ii] * data_in[ii, j, k] + @turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1) + res = zero(eltype(tmp1)) + for ii in axes(matrix, 2) + res += matrix[i, ii] * data_in[ii, j, k] + end + tmp1[i, j, k] = res end - tmp1[i, j, k] = res - end - - # Interpolate in y-direction - # @tullio threads=false tmp2[i, j, k] = matrix[j, jj] * tmp1[i, jj, k] - @turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1) - res = zero(eltype(tmp2)) - for jj in axes(matrix, 2) - res += matrix[j, jj] * tmp1[i, jj, k] + + # Interpolate in y-direction + # @tullio threads=false tmp2[i, j, k] = matrix[j, jj] * tmp1[i, jj, k] + @turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1) + res = zero(eltype(tmp2)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * tmp1[i, jj, k] + end + tmp2[i, j, k] = res end - tmp2[i, j, k] = res - end - - # Interpolate in z-direction - # @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk] - @turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1) - res = zero(eltype(data_out)) - for kk in axes(matrix, 2) - res += matrix[k, kk] * tmp2[i, j, kk] + + # Interpolate in z-direction + # @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk] + @turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1) + res = zero(eltype(data_out)) + for kk in axes(matrix, 2) + res += matrix[k, kk] * tmp2[i, j, kk] + end + data_out[i, j, k] = res end - data_out[i, j, k] = res - end - return nothing + return nothing end # 3D version, apply matrixJ to dimension J of data_in function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, - matrix1::AbstractMatrix, matrix2::AbstractMatrix, matrix3::AbstractMatrix, - data_in:: AbstractArray{<:Any, 4}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2), size(matrix1, 2)), - tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 1), size(matrix1, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k] - @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix1, 2) - res += matrix1[i, ii] * data_in[v, ii, j, k] + matrix1::AbstractMatrix, matrix2::AbstractMatrix, + matrix3::AbstractMatrix, + data_in::AbstractArray{<:Any, 4}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 2), + size(matrix1, 2)), + tmp2 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 1), + size(matrix1, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k] + @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), + v in axes(tmp1, 1) + + res = zero(eltype(tmp1)) + for ii in axes(matrix1, 2) + res += matrix1[i, ii] * data_in[v, ii, j, k] + end + tmp1[v, i, j, k] = res end - tmp1[v, i, j, k] = res - end - - # Interpolate in y-direction - # @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k] - @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1) - res = zero(eltype(tmp1)) - for jj in axes(matrix2, 2) - res += matrix2[j, jj] * tmp1[v, i, jj, k] + + # Interpolate in y-direction + # @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k] + @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), + v in axes(tmp2, 1) + + res = zero(eltype(tmp1)) + for jj in axes(matrix2, 2) + res += matrix2[j, jj] * tmp1[v, i, jj, k] + end + tmp2[v, i, j, k] = res end - tmp2[v, i, j, k] = res - end - - # Interpolate in z-direction - # @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk] - @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for kk in axes(matrix3, 2) - res += matrix3[k, kk] * tmp2[v, i, j, kk] + + # Interpolate in z-direction + # @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk] + @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), + v in axes(data_out, 1) + + res = zero(eltype(data_out)) + for kk in axes(matrix3, 2) + res += matrix3[k, kk] * tmp2[v, i, j, kk] + end + data_out[v, i, j, k] = res end - data_out[v, i, j, k] = res - end - return nothing + return nothing end # 3D version, apply matrixJ to dimension J of data_in and add the result to data_out function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, - matrix1::AbstractMatrix, matrix2::AbstractMatrix, matrix3::AbstractMatrix, - data_in:: AbstractArray{<:Any, 4}, - tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2), size(matrix1, 2)), - tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 1), size(matrix1, 2))) - - # Interpolate in x-direction - # @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k] - @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1) - res = zero(eltype(tmp1)) - for ii in axes(matrix1, 2) - res += matrix1[i, ii] * data_in[v, ii, j, k] + matrix1::AbstractMatrix, matrix2::AbstractMatrix, + matrix3::AbstractMatrix, + data_in::AbstractArray{<:Any, 4}, + tmp1 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 2), + size(matrix1, 2)), + tmp2 = zeros(eltype(data_out), size(data_out, 1), + size(matrix1, 1), size(matrix1, 1), + size(matrix1, 2))) + + # Interpolate in x-direction + # @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k] + @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), + v in axes(tmp1, 1) + + res = zero(eltype(tmp1)) + for ii in axes(matrix1, 2) + res += matrix1[i, ii] * data_in[v, ii, j, k] + end + tmp1[v, i, j, k] = res end - tmp1[v, i, j, k] = res - end - - # Interpolate in y-direction - # @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k] - @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1) - res = zero(eltype(tmp1)) - for jj in axes(matrix2, 2) - res += matrix2[j, jj] * tmp1[v, i, jj, k] + + # Interpolate in y-direction + # @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k] + @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), + v in axes(tmp2, 1) + + res = zero(eltype(tmp1)) + for jj in axes(matrix2, 2) + res += matrix2[j, jj] * tmp1[v, i, jj, k] + end + tmp2[v, i, j, k] = res end - tmp2[v, i, j, k] = res - end - - # Interpolate in z-direction - # @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk] - @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) - res = zero(eltype(data_out)) - for kk in axes(matrix3, 2) - res += matrix3[k, kk] * tmp2[v, i, j, kk] + + # Interpolate in z-direction + # @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk] + @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), + v in axes(data_out, 1) + + res = zero(eltype(data_out)) + for kk in axes(matrix3, 2) + res += matrix3[k, kk] * tmp2[v, i, j, kk] + end + data_out[v, i, j, k] += res end - data_out[v, i, j, k] += res - end - return nothing + return nothing end diff --git a/src/solvers/dgsem/l2projection.jl b/src/solvers/dgsem/l2projection.jl index 44092b2f720..0bb46f5ca15 100644 --- a/src/solvers/dgsem/l2projection.jl +++ b/src/solvers/dgsem/l2projection.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This diagram shows what is meant by "lower", "upper", and "large": # +1 +1 @@ -20,141 +20,135 @@ # # That is, we are only concerned with 2:1 subdivision of a surface/element. - # Calculate forward projection matrix for discrete L2 projection from large to upper # # Note: This is actually an interpolation. function calc_forward_upper(n_nodes) - # Calculate nodes, weights, and barycentric weights - nodes, weights = gauss_lobatto_nodes_weights(n_nodes) - wbary = barycentric_weights(nodes) - - # Calculate projection matrix (actually: interpolation) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] + 1), nodes, wbary) - for i in 1:n_nodes - operator[j, i] = poly[i] + # Calculate nodes, weights, and barycentric weights + nodes, weights = gauss_lobatto_nodes_weights(n_nodes) + wbary = barycentric_weights(nodes) + + # Calculate projection matrix (actually: interpolation) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] + 1), nodes, wbary) + for i in 1:n_nodes + operator[j, i] = poly[i] + end end - end - return operator + return operator end - # Calculate forward projection matrix for discrete L2 projection from large to lower # # Note: This is actually an interpolation. function calc_forward_lower(n_nodes) - # Calculate nodes, weights, and barycentric weights - nodes, weights = gauss_lobatto_nodes_weights(n_nodes) - wbary = barycentric_weights(nodes) - - # Calculate projection matrix (actually: interpolation) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] - 1), nodes, wbary) - for i in 1:n_nodes - operator[j, i] = poly[i] + # Calculate nodes, weights, and barycentric weights + nodes, weights = gauss_lobatto_nodes_weights(n_nodes) + wbary = barycentric_weights(nodes) + + # Calculate projection matrix (actually: interpolation) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] - 1), nodes, wbary) + for i in 1:n_nodes + operator[j, i] = poly[i] + end end - end - return operator + return operator end - # Calculate reverse projection matrix for discrete L2 projection from upper to large (Gauss version) # # Note: To not make the L2 projection exact, first convert to Gauss nodes, # perform projection, and convert back to Gauss-Lobatto. function calc_reverse_upper(n_nodes, ::Val{:gauss}) - # Calculate nodes, weights, and barycentric weights for Legendre-Gauss - gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes) - gauss_wbary = barycentric_weights(gauss_nodes) - - # Calculate projection matrix (actually: discrete L2 projection with errors) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (gauss_nodes[j] + 1), gauss_nodes, gauss_wbary) - for i in 1:n_nodes - operator[i, j] = 1/2 * poly[i] * gauss_weights[j]/gauss_weights[i] + # Calculate nodes, weights, and barycentric weights for Legendre-Gauss + gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes) + gauss_wbary = barycentric_weights(gauss_nodes) + + # Calculate projection matrix (actually: discrete L2 projection with errors) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (gauss_nodes[j] + 1), + gauss_nodes, gauss_wbary) + for i in 1:n_nodes + operator[i, j] = 1 / 2 * poly[i] * gauss_weights[j] / gauss_weights[i] + end end - end - # Calculate Vandermondes - lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes) - gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes) - lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes) + # Calculate Vandermondes + lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes) + gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes) + lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes) - return gauss2lobatto * operator * lobatto2gauss + return gauss2lobatto * operator * lobatto2gauss end - # Calculate reverse projection matrix for discrete L2 projection from lower to large (Gauss version) # # Note: To not make the L2 projection exact, first convert to Gauss nodes, # perform projection, and convert back to Gauss-Lobatto. function calc_reverse_lower(n_nodes, ::Val{:gauss}) - # Calculate nodes, weights, and barycentric weights for Legendre-Gauss - gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes) - gauss_wbary = barycentric_weights(gauss_nodes) - - # Calculate projection matrix (actually: discrete L2 projection with errors) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (gauss_nodes[j] - 1), gauss_nodes, gauss_wbary) - for i in 1:n_nodes - operator[i, j] = 1/2 * poly[i] * gauss_weights[j]/gauss_weights[i] + # Calculate nodes, weights, and barycentric weights for Legendre-Gauss + gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes) + gauss_wbary = barycentric_weights(gauss_nodes) + + # Calculate projection matrix (actually: discrete L2 projection with errors) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (gauss_nodes[j] - 1), + gauss_nodes, gauss_wbary) + for i in 1:n_nodes + operator[i, j] = 1 / 2 * poly[i] * gauss_weights[j] / gauss_weights[i] + end end - end - # Calculate Vandermondes - lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes) - gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes) - lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes) + # Calculate Vandermondes + lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes) + gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes) + lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes) - return gauss2lobatto * operator * lobatto2gauss + return gauss2lobatto * operator * lobatto2gauss end - # Calculate reverse projection matrix for discrete L2 projection from upper to large (Gauss-Lobatto # version) function calc_reverse_upper(n_nodes, ::Val{:gauss_lobatto}) - # Calculate nodes, weights, and barycentric weights - nodes, weights = gauss_lobatto_nodes_weights(n_nodes) - wbary = barycentric_weights(nodes) - - # Calculate projection matrix (actually: discrete L2 projection with errors) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] + 1), nodes, wbary) - for i in 1:n_nodes - operator[i, j] = 1/2 * poly[i] * weights[j]/weights[i] + # Calculate nodes, weights, and barycentric weights + nodes, weights = gauss_lobatto_nodes_weights(n_nodes) + wbary = barycentric_weights(nodes) + + # Calculate projection matrix (actually: discrete L2 projection with errors) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] + 1), nodes, wbary) + for i in 1:n_nodes + operator[i, j] = 1 / 2 * poly[i] * weights[j] / weights[i] + end end - end - return operator + return operator end - # Calculate reverse projection matrix for discrete L2 projection from lower to large (Gauss-Lobatto # version) function calc_reverse_lower(n_nodes, ::Val{:gauss_lobatto}) - # Calculate nodes, weights, and barycentric weights - nodes, weights = gauss_lobatto_nodes_weights(n_nodes) - wbary = barycentric_weights(nodes) - - # Calculate projection matrix (actually: discrete L2 projection with errors) - operator = zeros(n_nodes, n_nodes) - for j in 1:n_nodes - poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] - 1), nodes, wbary) - for i in 1:n_nodes - operator[i, j] = 1/2 * poly[i] * weights[j]/weights[i] + # Calculate nodes, weights, and barycentric weights + nodes, weights = gauss_lobatto_nodes_weights(n_nodes) + wbary = barycentric_weights(nodes) + + # Calculate projection matrix (actually: discrete L2 projection with errors) + operator = zeros(n_nodes, n_nodes) + for j in 1:n_nodes + poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] - 1), nodes, wbary) + for i in 1:n_nodes + operator[i, j] = 1 / 2 * poly[i] * weights[j] / weights[i] + end end - end - return operator + return operator end - - end # @muladd diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index ba582b0d47e..9b87de777a6 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -3,32 +3,41 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - - -mutable struct P4estElementContainer{NDIMS, RealT<:Real, uEltype<:Real, NDIMSP1, NDIMSP2, NDIMSP3} <: AbstractContainer - # Physical coordinates at each node - node_coordinates ::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] - # Jacobian matrix of the transformation - # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix ::Array{RealT, NDIMSP3} - # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors ::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] - # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian ::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] - # Buffer for calculated surface flux - surface_flux_values ::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] - - # internal `resize!`able storage - _node_coordinates ::Vector{RealT} - _jacobian_matrix ::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian ::Vector{RealT} - _surface_flux_values ::Vector{uEltype} +#! format: noindent + +mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, + NDIMSP2, NDIMSP3} <: AbstractContainer + # Physical coordinates at each node + node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + # Jacobian matrix of the transformation + # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... + jacobian_matrix::Array{RealT, NDIMSP3} + # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) + contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) + inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + # Buffer for calculated surface flux + surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + + # internal `resize!`able storage + _node_coordinates::Vector{RealT} + _jacobian_matrix::Vector{RealT} + _contravariant_vectors::Vector{RealT} + _inverse_jacobian::Vector{RealT} + _surface_flux_values::Vector{uEltype} end -@inline nelements(elements::P4estElementContainer) = size(elements.node_coordinates, ndims(elements) + 2) -@inline Base.ndims(::P4estElementContainer{NDIMS}) where NDIMS = NDIMS -@inline Base.eltype(::P4estElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT, uEltype} = uEltype +@inline function nelements(elements::P4estElementContainer) + size(elements.node_coordinates, ndims(elements) + 2) +end +@inline Base.ndims(::P4estElementContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estElementContainer{NDIMS, RealT, uEltype}) where { + NDIMS, + RealT, + uEltype + } + uEltype +end # Only one-dimensional `Array`s are `resize!`able in Julia. # Hence, we use `Vector`s as internal storage and `resize!` @@ -36,247 +45,268 @@ end # `unsafe_wrap`ping multi-dimensional `Array`s around the # internal storage. function Base.resize!(elements::P4estElementContainer, capacity) - @unpack _node_coordinates, _jacobian_matrix, _contravariant_vectors, + @unpack _node_coordinates, _jacobian_matrix, _contravariant_vectors, _inverse_jacobian, _surface_flux_values = elements - n_dims = ndims(elements) - n_nodes = size(elements.node_coordinates, 2) - n_variables = size(elements.surface_flux_values, 1) + n_dims = ndims(elements) + n_nodes = size(elements.node_coordinates, 2) + n_variables = size(elements.surface_flux_values, 1) - resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) + resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) + elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (n_dims, ntuple(_ -> n_nodes, n_dims)..., + capacity)) - resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), - (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) + resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) + elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + (n_dims, n_dims, + ntuple(_ -> n_nodes, n_dims)..., capacity)) - resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), - size(elements.jacobian_matrix)) + resize!(_contravariant_vectors, length(_jacobian_matrix)) + elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + size(elements.jacobian_matrix)) - resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), - (ntuple(_ -> n_nodes, n_dims)..., capacity)) + resize!(_inverse_jacobian, n_nodes^n_dims * capacity) + elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + (ntuple(_ -> n_nodes, n_dims)..., capacity)) - resize!(_surface_flux_values, - n_variables * n_nodes^(n_dims-1) * (n_dims*2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, ntuple(_ -> n_nodes, n_dims-1)..., n_dims*2, capacity)) + resize!(_surface_flux_values, + n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) + elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, + ntuple(_ -> n_nodes, n_dims - 1)..., + n_dims * 2, capacity)) - return nothing + return nothing end - # Create element container and initialize element data function init_elements(mesh::P4estMesh{NDIMS, RealT}, equations, - basis, ::Type{uEltype}) where {NDIMS, RealT<:Real, uEltype<:Real} - nelements = ncells(mesh) - - _node_coordinates = Vector{RealT}(undef, NDIMS * nnodes(basis)^NDIMS * nelements) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) - - _jacobian_matrix = Vector{RealT}(undef, NDIMS^2 * nnodes(basis)^NDIMS * nelements) - jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), - (NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) - - _contravariant_vectors = similar(_jacobian_matrix) - contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), - size(jacobian_matrix)) - - _inverse_jacobian = Vector{RealT}(undef, nnodes(basis)^NDIMS * nelements) - inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), - (ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) - - _surface_flux_values = Vector{uEltype}(undef, - nvariables(equations) * nnodes(basis)^(NDIMS-1) * (NDIMS*2) * nelements) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., NDIMS*2, nelements)) - - elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS+1, NDIMS+2, NDIMS+3}( - node_coordinates, jacobian_matrix, contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) - - init_elements!(elements, mesh, basis) - return elements + basis, + ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real} + nelements = ncells(mesh) + + _node_coordinates = Vector{RealT}(undef, NDIMS * nnodes(basis)^NDIMS * nelements) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., + nelements)) + + _jacobian_matrix = Vector{RealT}(undef, NDIMS^2 * nnodes(basis)^NDIMS * nelements) + jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + (NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., + nelements)) + + _contravariant_vectors = similar(_jacobian_matrix) + contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + size(jacobian_matrix)) + + _inverse_jacobian = Vector{RealT}(undef, nnodes(basis)^NDIMS * nelements) + inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + (ntuple(_ -> nnodes(basis), NDIMS)..., nelements)) + + _surface_flux_values = Vector{uEltype}(undef, + nvariables(equations) * + nnodes(basis)^(NDIMS - 1) * (NDIMS * 2) * + nelements) + surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (nvariables(equations), + ntuple(_ -> nnodes(basis), NDIMS - 1)..., + NDIMS * 2, nelements)) + + elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, + NDIMS + 3}(node_coordinates, jacobian_matrix, + contravariant_vectors, + inverse_jacobian, surface_flux_values, + _node_coordinates, _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, _surface_flux_values) + + init_elements!(elements, mesh, basis) + return elements end +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: + AbstractContainer + u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] + neighbor_ids::Matrix{Int} # [primary/secondary, interface] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] -mutable struct P4estInterfaceContainer{NDIMS, uEltype<:Real, NDIMSP2} <: AbstractContainer - u ::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids ::Matrix{Int} # [primary/secondary, interface] - node_indices ::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] - - # internal `resize!`able storage - _u ::Vector{uEltype} - _neighbor_ids ::Vector{Int} - _node_indices ::Vector{NTuple{NDIMS, Symbol}} + # internal `resize!`able storage + _u::Vector{uEltype} + _neighbor_ids::Vector{Int} + _node_indices::Vector{NTuple{NDIMS, Symbol}} end -@inline ninterfaces(interfaces::P4estInterfaceContainer) = size(interfaces.neighbor_ids, 2) -@inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where NDIMS = NDIMS +@inline function ninterfaces(interfaces::P4estInterfaceContainer) + size(interfaces.neighbor_ids, 2) +end +@inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) - @unpack _u, _neighbor_ids, _node_indices = interfaces + @unpack _u, _neighbor_ids, _node_indices = interfaces - n_dims = ndims(interfaces) - n_nodes = size(interfaces.u, 3) - n_variables = size(interfaces.u, 2) + n_dims = ndims(interfaces) + n_nodes = size(interfaces.u, 3) + n_variables = size(interfaces.u, 2) - resize!(_u, 2 * n_variables * n_nodes^(n_dims-1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity)) + resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) + interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., + capacity)) - resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + resize!(_neighbor_ids, 2 * capacity) + interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) - resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + resize!(_node_indices, 2 * capacity) + interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) - return nothing + return nothing end - # Create interface container and initialize interface data. function init_interfaces(mesh::P4estMesh, equations, basis, elements) - NDIMS = ndims(elements) - uEltype = eltype(elements) + NDIMS = ndims(elements) + uEltype = eltype(elements) - # Initialize container - n_interfaces = count_required_surfaces(mesh).interfaces + # Initialize container + n_interfaces = count_required_surfaces(mesh).interfaces - _u = Vector{uEltype}(undef, 2 * nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_interfaces) - u = unsafe_wrap(Array, pointer(_u), - (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_interfaces)) + _u = Vector{uEltype}(undef, + 2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) * + n_interfaces) + u = unsafe_wrap(Array, pointer(_u), + (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)..., + n_interfaces)) - _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces)) + _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces)) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) - node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) + _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) + node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS+2}(u, neighbor_ids, node_indices, - _u, _neighbor_ids, _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, + node_indices, + _u, _neighbor_ids, + _node_indices) - init_interfaces!(interfaces, mesh) + init_interfaces!(interfaces, mesh) - return interfaces + return interfaces end - function init_interfaces!(interfaces, mesh::P4estMesh) - init_surfaces!(interfaces, nothing, nothing, mesh) + init_surfaces!(interfaces, nothing, nothing, mesh) - return interfaces + return interfaces end +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: + AbstractContainer + u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] + neighbor_ids::Vector{Int} # [boundary] + node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + name::Vector{Symbol} # [boundary] -mutable struct P4estBoundaryContainer{NDIMS, uEltype<:Real, NDIMSP1} <: AbstractContainer - u ::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] - name ::Vector{Symbol} # [boundary] - - # internal `resize!`able storage - _u ::Vector{uEltype} + # internal `resize!`able storage + _u::Vector{uEltype} end -@inline nboundaries(boundaries::P4estBoundaryContainer) = length(boundaries.neighbor_ids) -@inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where NDIMS = NDIMS +@inline function nboundaries(boundaries::P4estBoundaryContainer) + length(boundaries.neighbor_ids) +end +@inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) - @unpack _u, neighbor_ids, node_indices, name = boundaries + @unpack _u, neighbor_ids, node_indices, name = boundaries - n_dims = ndims(boundaries) - n_nodes = size(boundaries.u, 2) - n_variables = size(boundaries.u, 1) + n_dims = ndims(boundaries) + n_nodes = size(boundaries.u, 2) + n_variables = size(boundaries.u, 1) - resize!(_u, n_variables * n_nodes^(n_dims-1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), - (n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity)) + resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) + boundaries.u = unsafe_wrap(Array, pointer(_u), + (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., + capacity)) - resize!(neighbor_ids, capacity) + resize!(neighbor_ids, capacity) - resize!(node_indices, capacity) + resize!(node_indices, capacity) - resize!(name, capacity) + resize!(name, capacity) - return nothing + return nothing end - # Create interface container and initialize interface data in `elements`. function init_boundaries(mesh::P4estMesh, equations, basis, elements) - NDIMS = ndims(elements) - uEltype = eltype(elements) + NDIMS = ndims(elements) + uEltype = eltype(elements) - # Initialize container - n_boundaries = count_required_surfaces(mesh).boundaries + # Initialize container + n_boundaries = count_required_surfaces(mesh).boundaries - _u = Vector{uEltype}(undef, nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_boundaries) - u = unsafe_wrap(Array, pointer(_u), - (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_boundaries)) + _u = Vector{uEltype}(undef, + nvariables(equations) * nnodes(basis)^(NDIMS - 1) * + n_boundaries) + u = unsafe_wrap(Array, pointer(_u), + (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)..., + n_boundaries)) - neighbor_ids = Vector{Int}(undef, n_boundaries) - node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) - names = Vector{Symbol}(undef, n_boundaries) + neighbor_ids = Vector{Int}(undef, n_boundaries) + node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) + names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS+1}(u, neighbor_ids, - node_indices, names, _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, + node_indices, names, + _u) - if n_boundaries > 0 - init_boundaries!(boundaries, mesh) - end + if n_boundaries > 0 + init_boundaries!(boundaries, mesh) + end - return boundaries + return boundaries end - function init_boundaries!(boundaries, mesh::P4estMesh) - init_surfaces!(nothing, nothing, boundaries, mesh) + init_surfaces!(nothing, nothing, boundaries, mesh) - return boundaries + return boundaries end - # Function barrier for type stability function init_boundaries_iter_face_inner(info, boundaries, boundary_id, mesh) - # Extract boundary data - side = unsafe_load_side(info) - # Get local tree, one-based indexing - tree = unsafe_load_tree(mesh.p4est, side.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset + # Extract boundary data + side = unsafe_load_side(info) + # Get local tree, one-based indexing + tree = unsafe_load_tree(mesh.p4est, side.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset - # Verify before accessing is.full, but this should never happen - @assert side.is_hanging == false + # Verify before accessing is.full, but this should never happen + @assert side.is_hanging == false - local_quad_id = side.is.full.quadid - # Global ID of this quad - quad_id = offset + local_quad_id + local_quad_id = side.is.full.quadid + # Global ID of this quad + quad_id = offset + local_quad_id - # Write data to boundaries container - # `p4est` uses zero-based indexing; convert to one-based indexing - boundaries.neighbor_ids[boundary_id] = quad_id + 1 + # Write data to boundaries container + # `p4est` uses zero-based indexing; convert to one-based indexing + boundaries.neighbor_ids[boundary_id] = quad_id + 1 - # Face at which the boundary lies - face = side.face + # Face at which the boundary lies + face = side.face - # Save boundaries.node_indices dimension specific in containers_[23]d.jl - init_boundary_node_indices!(boundaries, face, boundary_id) + # Save boundaries.node_indices dimension specific in containers_[23]d.jl + init_boundary_node_indices!(boundaries, face, boundary_id) - # One-based indexing - boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1] + # One-based indexing + boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1] - return nothing + return nothing end - # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -302,380 +332,391 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype<:Real, NDIMSP1, NDIMSP3} <: AbstractContainer - u ::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids ::Matrix{Int} # [position, mortar] - node_indices ::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - - # internal `resize!`able storage - _u ::Vector{uEltype} - _neighbor_ids ::Vector{Int} - _node_indices ::Vector{NTuple{NDIMS, Symbol}} +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: + AbstractContainer + u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] + neighbor_ids::Matrix{Int} # [position, mortar] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + + # internal `resize!`able storage + _u::Vector{uEltype} + _neighbor_ids::Vector{Int} + _node_indices::Vector{NTuple{NDIMS, Symbol}} end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) -@inline Base.ndims(::P4estMortarContainer{NDIMS}) where NDIMS = NDIMS +@inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) - @unpack _u, _neighbor_ids, _node_indices = mortars + @unpack _u, _neighbor_ids, _node_indices = mortars - n_dims = ndims(mortars) - n_nodes = size(mortars.u, 4) - n_variables = size(mortars.u, 2) + n_dims = ndims(mortars) + n_nodes = size(mortars.u, 4) + n_variables = size(mortars.u, 2) - resize!(_u, 2 * n_variables * 2^(n_dims-1) * n_nodes^(n_dims-1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, 2^(n_dims-1), ntuple(_ -> n_nodes, n_dims-1)..., capacity)) + resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) + mortars.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, 2^(n_dims - 1), + ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) - resize!(_neighbor_ids, (2^(n_dims-1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2^(n_dims-1) + 1, capacity)) + resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) + mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2^(n_dims - 1) + 1, capacity)) - resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + resize!(_node_indices, 2 * capacity) + mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) - return nothing + return nothing end - # Create mortar container and initialize mortar data. function init_mortars(mesh::P4estMesh, equations, basis, elements) - NDIMS = ndims(elements) - uEltype = eltype(elements) - - # Initialize container - n_mortars = count_required_surfaces(mesh).mortars - - _u = Vector{uEltype}(undef, - 2 * nvariables(equations) * 2^(NDIMS-1) * nnodes(basis)^(NDIMS-1) * n_mortars) - u = unsafe_wrap(Array, pointer(_u), - (2, nvariables(equations), 2^(NDIMS-1), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mortars)) - - _neighbor_ids = Vector{Int}(undef, (2^(NDIMS-1) + 1) * n_mortars) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2^(NDIMS-1) + 1, n_mortars)) - - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) - node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS+1, NDIMS+3}(u, neighbor_ids, node_indices, - _u, _neighbor_ids, _node_indices) - - if n_mortars > 0 - init_mortars!(mortars, mesh) - end + NDIMS = ndims(elements) + uEltype = eltype(elements) + + # Initialize container + n_mortars = count_required_surfaces(mesh).mortars + + _u = Vector{uEltype}(undef, + 2 * nvariables(equations) * 2^(NDIMS - 1) * + nnodes(basis)^(NDIMS - 1) * n_mortars) + u = unsafe_wrap(Array, pointer(_u), + (2, nvariables(equations), 2^(NDIMS - 1), + ntuple(_ -> nnodes(basis), NDIMS - 1)..., n_mortars)) + + _neighbor_ids = Vector{Int}(undef, (2^(NDIMS - 1) + 1) * n_mortars) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2^(NDIMS - 1) + 1, n_mortars)) + + _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) + node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) + + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) + + if n_mortars > 0 + init_mortars!(mortars, mesh) + end - return mortars + return mortars end - function init_mortars!(mortars, mesh::P4estMesh) - init_surfaces!(nothing, mortars, nothing, mesh) + init_surfaces!(nothing, mortars, nothing, mesh) - return mortars + return mortars end - function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) - # Re-initialize elements container - @unpack elements = cache - resize!(elements, ncells(mesh)) - init_elements!(elements, mesh, dg.basis) + # Re-initialize elements container + @unpack elements = cache + resize!(elements, ncells(mesh)) + init_elements!(elements, mesh, dg.basis) - required = count_required_surfaces(mesh) + required = count_required_surfaces(mesh) - # resize interfaces container - @unpack interfaces = cache - resize!(interfaces, required.interfaces) + # resize interfaces container + @unpack interfaces = cache + resize!(interfaces, required.interfaces) - # resize boundaries container - @unpack boundaries = cache - resize!(boundaries, required.boundaries) + # resize boundaries container + @unpack boundaries = cache + resize!(boundaries, required.boundaries) - # resize mortars container - @unpack mortars = cache - resize!(mortars, required.mortars) + # resize mortars container + @unpack mortars = cache + resize!(mortars, required.mortars) - # re-initialize containers together to reduce - # the number of iterations over the mesh in `p4est` - init_surfaces!(interfaces, mortars, boundaries, mesh) + # re-initialize containers together to reduce + # the number of iterations over the mesh in `p4est` + init_surfaces!(interfaces, mortars, boundaries, mesh) end - # A helper struct used in initialization methods below mutable struct InitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries, Mesh} - interfaces ::Interfaces - interface_id::Int - mortars ::Mortars - mortar_id ::Int - boundaries ::Boundaries - boundary_id ::Int - mesh ::Mesh + interfaces::Interfaces + interface_id::Int + mortars::Mortars + mortar_id::Int + boundaries::Boundaries + boundary_id::Int + mesh::Mesh end function InitSurfacesIterFaceUserData(interfaces, mortars, boundaries, mesh) - return InitSurfacesIterFaceUserData{ - typeof(interfaces), typeof(mortars), typeof(boundaries), typeof(mesh)}( - interfaces, 1, mortars, 1, boundaries, 1, mesh) + return InitSurfacesIterFaceUserData{ + typeof(interfaces), typeof(mortars), + typeof(boundaries), typeof(mesh)}(interfaces, 1, + mortars, 1, + boundaries, 1, + mesh) end function init_surfaces_iter_face(info, user_data) - # Unpack user_data - data = unsafe_pointer_to_objref(Ptr{InitSurfacesIterFaceUserData}(user_data)) + # Unpack user_data + data = unsafe_pointer_to_objref(Ptr{InitSurfacesIterFaceUserData}(user_data)) - # Function barrier because the unpacked user_data above is type-unstable - init_surfaces_iter_face_inner(info, data) + # Function barrier because the unpacked user_data above is type-unstable + init_surfaces_iter_face_inner(info, data) end # 2D -cfunction(::typeof(init_surfaces_iter_face), ::Val{2}) = @cfunction(init_surfaces_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_surfaces_iter_face), ::Val{2}) + @cfunction(init_surfaces_iter_face, Cvoid, + (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(init_surfaces_iter_face), ::Val{3}) = @cfunction(init_surfaces_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_surfaces_iter_face), ::Val{3}) + @cfunction(init_surfaces_iter_face, Cvoid, + (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +end # Function barrier for type stability function init_surfaces_iter_face_inner(info, user_data) - @unpack interfaces, mortars, boundaries = user_data - elem_count = unsafe_load(info).sides.elem_count - - if elem_count == 2 - # Two neighboring elements => Interface or mortar - - # Extract surface data - sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) - - if sides[1].is_hanging == false && sides[2].is_hanging == false - # No hanging nodes => normal interface - if interfaces !== nothing - init_interfaces_iter_face_inner(info, sides, user_data) - end - else - # Hanging nodes => mortar - if mortars !== nothing - init_mortars_iter_face_inner(info, sides, user_data) - end - end - elseif elem_count == 1 - # One neighboring elements => boundary - if boundaries !== nothing - init_boundaries_iter_face_inner(info, user_data) + @unpack interfaces, mortars, boundaries = user_data + elem_count = unsafe_load(info).sides.elem_count + + if elem_count == 2 + # Two neighboring elements => Interface or mortar + + # Extract surface data + sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) + + if sides[1].is_hanging == false && sides[2].is_hanging == false + # No hanging nodes => normal interface + if interfaces !== nothing + init_interfaces_iter_face_inner(info, sides, user_data) + end + else + # Hanging nodes => mortar + if mortars !== nothing + init_mortars_iter_face_inner(info, sides, user_data) + end + end + elseif elem_count == 1 + # One neighboring elements => boundary + if boundaries !== nothing + init_boundaries_iter_face_inner(info, user_data) + end end - end - return nothing + return nothing end function init_surfaces!(interfaces, mortars, boundaries, mesh::P4estMesh) - # Let `p4est` iterate over all interfaces and call init_surfaces_iter_face - iter_face_c = cfunction(init_surfaces_iter_face, Val(ndims(mesh))) - user_data = InitSurfacesIterFaceUserData( - interfaces, mortars, boundaries, mesh) + # Let `p4est` iterate over all interfaces and call init_surfaces_iter_face + iter_face_c = cfunction(init_surfaces_iter_face, Val(ndims(mesh))) + user_data = InitSurfacesIterFaceUserData(interfaces, mortars, boundaries, mesh) - iterate_p4est(mesh.p4est, user_data; iter_face_c=iter_face_c) + iterate_p4est(mesh.p4est, user_data; iter_face_c = iter_face_c) - return interfaces + return interfaces end - # Initialization of interfaces after the function barrier function init_interfaces_iter_face_inner(info, sides, user_data) - @unpack interfaces, interface_id, mesh = user_data - user_data.interface_id += 1 - - # Get Tuple of local trees, one-based indexing - trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), - unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) - # Quadrant numbering offsets of the quadrants at this interface - offsets = SVector(trees[1].quadrants_offset, - trees[2].quadrants_offset) - - local_quad_ids = SVector(sides[1].is.full.quadid, sides[2].is.full.quadid) - # Global IDs of the neighboring quads - quad_ids = offsets + local_quad_ids - - # Write data to interfaces container - # `p4est` uses zero-based indexing; convert to one-based indexing - interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1 - interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1 - - # Face at which the interface lies - faces = (sides[1].face, sides[2].face) + @unpack interfaces, interface_id, mesh = user_data + user_data.interface_id += 1 + + # Get Tuple of local trees, one-based indexing + trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), + unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) + # Quadrant numbering offsets of the quadrants at this interface + offsets = SVector(trees[1].quadrants_offset, + trees[2].quadrants_offset) + + local_quad_ids = SVector(sides[1].is.full.quadid, sides[2].is.full.quadid) + # Global IDs of the neighboring quads + quad_ids = offsets + local_quad_ids + + # Write data to interfaces container + # `p4est` uses zero-based indexing; convert to one-based indexing + interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1 + interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1 + + # Face at which the interface lies + faces = (sides[1].face, sides[2].face) - # Save interfaces.node_indices dimension specific in containers_[23]d.jl - init_interface_node_indices!(interfaces, faces, - unsafe_load(info).orientation, interface_id) + # Save interfaces.node_indices dimension specific in containers_[23]d.jl + init_interface_node_indices!(interfaces, faces, + unsafe_load(info).orientation, interface_id) - return nothing + return nothing end - # Initialization of boundaries after the function barrier function init_boundaries_iter_face_inner(info, user_data) - @unpack boundaries, boundary_id, mesh = user_data - user_data.boundary_id += 1 + @unpack boundaries, boundary_id, mesh = user_data + user_data.boundary_id += 1 - # Extract boundary data - side = unsafe_load_side(info) - # Get local tree, one-based indexing - tree = unsafe_load_tree(mesh.p4est, side.treeid + 1) - # Quadrant numbering offset of this quadrant - offset = tree.quadrants_offset + # Extract boundary data + side = unsafe_load_side(info) + # Get local tree, one-based indexing + tree = unsafe_load_tree(mesh.p4est, side.treeid + 1) + # Quadrant numbering offset of this quadrant + offset = tree.quadrants_offset - # Verify before accessing is.full, but this should never happen - @assert side.is_hanging == false + # Verify before accessing is.full, but this should never happen + @assert side.is_hanging == false - local_quad_id = side.is.full.quadid - # Global ID of this quad - quad_id = offset + local_quad_id + local_quad_id = side.is.full.quadid + # Global ID of this quad + quad_id = offset + local_quad_id - # Write data to boundaries container - # `p4est` uses zero-based indexing; convert to one-based indexing - boundaries.neighbor_ids[boundary_id] = quad_id + 1 + # Write data to boundaries container + # `p4est` uses zero-based indexing; convert to one-based indexing + boundaries.neighbor_ids[boundary_id] = quad_id + 1 - # Face at which the boundary lies - face = side.face + # Face at which the boundary lies + face = side.face - # Save boundaries.node_indices dimension specific in containers_[23]d.jl - init_boundary_node_indices!(boundaries, face, boundary_id) + # Save boundaries.node_indices dimension specific in containers_[23]d.jl + init_boundary_node_indices!(boundaries, face, boundary_id) - # One-based indexing - boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1] + # One-based indexing + boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1] - return nothing + return nothing end - # Initialization of mortars after the function barrier function init_mortars_iter_face_inner(info, sides, user_data) - @unpack mortars, mortar_id, mesh = user_data - user_data.mortar_id += 1 - - # Get Tuple of local trees, one-based indexing - trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), - unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) - # Quadrant numbering offsets of the quadrants at this interface - offsets = SVector(trees[1].quadrants_offset, - trees[2].quadrants_offset) - - if sides[1].is_hanging == true - # Left is small, right is large - faces = (sides[1].face, sides[2].face) - - local_small_quad_ids = sides[1].is.hanging.quadid - # Global IDs of the two small quads - small_quad_ids = offsets[1] .+ local_small_quad_ids - - # Just be sure before accessing is.full - @assert sides[2].is_hanging == false - large_quad_id = offsets[2] + sides[2].is.full.quadid - else # sides[2].is_hanging == true - # Right is small, left is large. - # init_mortar_node_indices! below expects side 1 to contain the small elements. - faces = (sides[2].face, sides[1].face) - - local_small_quad_ids = sides[2].is.hanging.quadid - # Global IDs of the two small quads - small_quad_ids = offsets[2] .+ local_small_quad_ids - - # Just be sure before accessing is.full - @assert sides[1].is_hanging == false - large_quad_id = offsets[1] + sides[1].is.full.quadid - end + @unpack mortars, mortar_id, mesh = user_data + user_data.mortar_id += 1 + + # Get Tuple of local trees, one-based indexing + trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), + unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) + # Quadrant numbering offsets of the quadrants at this interface + offsets = SVector(trees[1].quadrants_offset, + trees[2].quadrants_offset) + + if sides[1].is_hanging == true + # Left is small, right is large + faces = (sides[1].face, sides[2].face) + + local_small_quad_ids = sides[1].is.hanging.quadid + # Global IDs of the two small quads + small_quad_ids = offsets[1] .+ local_small_quad_ids + + # Just be sure before accessing is.full + @assert sides[2].is_hanging == false + large_quad_id = offsets[2] + sides[2].is.full.quadid + else # sides[2].is_hanging == true + # Right is small, left is large. + # init_mortar_node_indices! below expects side 1 to contain the small elements. + faces = (sides[2].face, sides[1].face) + + local_small_quad_ids = sides[2].is.hanging.quadid + # Global IDs of the two small quads + small_quad_ids = offsets[2] .+ local_small_quad_ids + + # Just be sure before accessing is.full + @assert sides[1].is_hanging == false + large_quad_id = offsets[1] + sides[1].is.full.quadid + end - # Write data to mortar container, 1 and 2 are the small elements - # `p4est` uses zero-based indexing; convert to one-based indexing - mortars.neighbor_ids[1:end-1, mortar_id] .= small_quad_ids[:] .+ 1 - # Last entry is the large element - mortars.neighbor_ids[end, mortar_id] = large_quad_id + 1 + # Write data to mortar container, 1 and 2 are the small elements + # `p4est` uses zero-based indexing; convert to one-based indexing + mortars.neighbor_ids[1:(end - 1), mortar_id] .= small_quad_ids[:] .+ 1 + # Last entry is the large element + mortars.neighbor_ids[end, mortar_id] = large_quad_id + 1 - init_mortar_node_indices!(mortars, faces, unsafe_load(info).orientation, mortar_id) + init_mortar_node_indices!(mortars, faces, unsafe_load(info).orientation, mortar_id) - return nothing + return nothing end - # Iterate over all interfaces and count # - (inner) interfaces # - mortars # - boundaries # and collect the numbers in `user_data` in this order. function count_surfaces_iter_face(info, user_data) - elem_count = unsafe_load(info).sides.elem_count - - if elem_count == 2 - # Two neighboring elements => Interface or mortar - - # Extract surface data - sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) - - if sides[1].is_hanging == false && sides[2].is_hanging == false - # No hanging nodes => normal interface - # Unpack user_data = [interface_count] and increment interface_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 1) - unsafe_store!(ptr, id + 1, 1) - else - # Hanging nodes => mortar - # Unpack user_data = [mortar_count] and increment mortar_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 2) - unsafe_store!(ptr, id + 1, 2) + elem_count = unsafe_load(info).sides.elem_count + + if elem_count == 2 + # Two neighboring elements => Interface or mortar + + # Extract surface data + sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) + + if sides[1].is_hanging == false && sides[2].is_hanging == false + # No hanging nodes => normal interface + # Unpack user_data = [interface_count] and increment interface_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 1) + unsafe_store!(ptr, id + 1, 1) + else + # Hanging nodes => mortar + # Unpack user_data = [mortar_count] and increment mortar_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 2) + unsafe_store!(ptr, id + 1, 2) + end + elseif elem_count == 1 + # One neighboring elements => boundary + + # Unpack user_data = [boundary_count] and increment boundary_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 3) + unsafe_store!(ptr, id + 1, 3) end - elseif elem_count == 1 - # One neighboring elements => boundary - - # Unpack user_data = [boundary_count] and increment boundary_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 3) - unsafe_store!(ptr, id + 1, 3) - end - return nothing + return nothing end # 2D -cfunction(::typeof(count_surfaces_iter_face), ::Val{2}) = @cfunction(count_surfaces_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(count_surfaces_iter_face), ::Val{2}) + @cfunction(count_surfaces_iter_face, Cvoid, + (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(count_surfaces_iter_face), ::Val{3}) = @cfunction(count_surfaces_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(count_surfaces_iter_face), ::Val{3}) + @cfunction(count_surfaces_iter_face, Cvoid, + (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +end function count_required_surfaces(mesh::P4estMesh) - # Let `p4est` iterate over all interfaces and call count_surfaces_iter_face - iter_face_c = cfunction(count_surfaces_iter_face, Val(ndims(mesh))) + # Let `p4est` iterate over all interfaces and call count_surfaces_iter_face + iter_face_c = cfunction(count_surfaces_iter_face, Val(ndims(mesh))) - # interfaces, mortars, boundaries - user_data = [0, 0, 0] + # interfaces, mortars, boundaries + user_data = [0, 0, 0] - iterate_p4est(mesh.p4est, user_data; iter_face_c=iter_face_c) + iterate_p4est(mesh.p4est, user_data; iter_face_c = iter_face_c) - # Return counters - return (interfaces = user_data[1], - mortars = user_data[2], - boundaries = user_data[3]) + # Return counters + return (interfaces = user_data[1], + mortars = user_data[2], + boundaries = user_data[3]) end - # Return direction of the face, which is indexed by node_indices @inline function indices2direction(indices) - if indices[1] === :begin - return 1 - elseif indices[1] === :end - return 2 - elseif indices[2] === :begin - return 3 - elseif indices[2] === :end - return 4 - elseif indices[3] === :begin - return 5 - else # if indices[3] === :end - return 6 - end + if indices[1] === :begin + return 1 + elseif indices[1] === :end + return 2 + elseif indices[2] === :begin + return 3 + elseif indices[2] === :end + return 4 + elseif indices[3] === :begin + return 5 + else # if indices[3] === :end + return 6 + end end - - include("containers_2d.jl") include("containers_3d.jl") include("containers_parallel.jl") include("containers_parallel_2d.jl") include("containers_parallel_3d.jl") - end # @muladd diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl index cf18e433ff7..4f7d903897a 100644 --- a/src/solvers/dgsem_p4est/containers_2d.jl +++ b/src/solvers/dgsem_p4est/containers_2d.jl @@ -3,174 +3,170 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize data structures in element container function init_elements!(elements, mesh::P4estMesh{2}, basis::LobattoLegendreBasis) - @unpack node_coordinates, jacobian_matrix, - contravariant_vectors, inverse_jacobian = elements + @unpack node_coordinates, jacobian_matrix, + contravariant_vectors, inverse_jacobian = elements - calc_node_coordinates!(node_coordinates, mesh, basis) + calc_node_coordinates!(node_coordinates, mesh, basis) - for element in 1:ncells(mesh) - calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) + for element in 1:ncells(mesh) + calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix) + calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix) - end + calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix) + end - return nothing + return nothing end - # Interpolate tree_node_coordinates to each quadrant at the nodes of the specified basis function calc_node_coordinates!(node_coordinates, mesh::P4estMesh{2}, basis::LobattoLegendreBasis) - # Hanging nodes will cause holes in the mesh if its polydeg is higher - # than the polydeg of the solver. - @assert length(basis.nodes) >= length(mesh.nodes) "The solver can't have a lower polydeg than the mesh" + # Hanging nodes will cause holes in the mesh if its polydeg is higher + # than the polydeg of the solver. + @assert length(basis.nodes)>=length(mesh.nodes) "The solver can't have a lower polydeg than the mesh" - calc_node_coordinates!(node_coordinates, mesh, basis.nodes) + calc_node_coordinates!(node_coordinates, mesh, basis.nodes) end # Interpolate tree_node_coordinates to each quadrant at the specified nodes function calc_node_coordinates!(node_coordinates, mesh::P4estMesh{2}, nodes::AbstractVector) - # We use `StrideArray`s here since these buffers are used in performance-critical - # places and the additional information passed to the compiler makes them faster - # than native `Array`s. - tmp1 = StrideArray(undef, real(mesh), - StaticInt(2), static_length(nodes), static_length(mesh.nodes)) - matrix1 = StrideArray(undef, real(mesh), - static_length(nodes), static_length(mesh.nodes)) - matrix2 = similar(matrix1) - baryweights_in = barycentric_weights(mesh.nodes) - - # Macros from `p4est` - p4est_root_len = 1 << P4EST_MAXLEVEL - p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l) - - trees = unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees) - - for tree in eachindex(trees) - offset = trees[tree].quadrants_offset - quadrants = unsafe_wrap_sc(p4est_quadrant_t, trees[tree].quadrants) - - for i in eachindex(quadrants) - element = offset + i - quad = quadrants[i] - - quad_length = p4est_quadrant_len(quad.level) / p4est_root_len - - nodes_out_x = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.x / p4est_root_len) .- 1 - nodes_out_y = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.y / p4est_root_len) .- 1 - polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in) - polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in) - - multiply_dimensionwise!( - view(node_coordinates, :, :, :, element), - matrix1, matrix2, - view(mesh.tree_node_coordinates, :, :, :, tree), - tmp1 - ) + # We use `StrideArray`s here since these buffers are used in performance-critical + # places and the additional information passed to the compiler makes them faster + # than native `Array`s. + tmp1 = StrideArray(undef, real(mesh), + StaticInt(2), static_length(nodes), static_length(mesh.nodes)) + matrix1 = StrideArray(undef, real(mesh), + static_length(nodes), static_length(mesh.nodes)) + matrix2 = similar(matrix1) + baryweights_in = barycentric_weights(mesh.nodes) + + # Macros from `p4est` + p4est_root_len = 1 << P4EST_MAXLEVEL + p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l) + + trees = unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees) + + for tree in eachindex(trees) + offset = trees[tree].quadrants_offset + quadrants = unsafe_wrap_sc(p4est_quadrant_t, trees[tree].quadrants) + + for i in eachindex(quadrants) + element = offset + i + quad = quadrants[i] + + quad_length = p4est_quadrant_len(quad.level) / p4est_root_len + + nodes_out_x = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+ + quad.x / p4est_root_len) .- 1 + nodes_out_y = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+ + quad.y / p4est_root_len) .- 1 + polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, + baryweights_in) + polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, + baryweights_in) + + multiply_dimensionwise!(view(node_coordinates, :, :, :, element), + matrix1, matrix2, + view(mesh.tree_node_coordinates, :, :, :, tree), + tmp1) + end end - end - return node_coordinates + return node_coordinates end - # Initialize node_indices of interface container @inline function init_interface_node_indices!(interfaces::P4estInterfaceContainer{2}, faces, orientation, interface_id) - # Iterate over primary and secondary element - for side in 1:2 - # Align interface in positive coordinate direction of primary element. - # For orientation == 1, the secondary element needs to be indexed backwards - # relative to the interface. - if side == 1 || orientation == 0 - # Forward indexing - i = :i_forward - else - # Backward indexing - i = :i_backward + # Iterate over primary and secondary element + for side in 1:2 + # Align interface in positive coordinate direction of primary element. + # For orientation == 1, the secondary element needs to be indexed backwards + # relative to the interface. + if side == 1 || orientation == 0 + # Forward indexing + i = :i_forward + else + # Backward indexing + i = :i_backward + end + + if faces[side] == 0 + # Index face in negative x-direction + interfaces.node_indices[side, interface_id] = (:begin, i) + elseif faces[side] == 1 + # Index face in positive x-direction + interfaces.node_indices[side, interface_id] = (:end, i) + elseif faces[side] == 2 + # Index face in negative y-direction + interfaces.node_indices[side, interface_id] = (i, :begin) + else # faces[side] == 3 + # Index face in positive y-direction + interfaces.node_indices[side, interface_id] = (i, :end) + end end - if faces[side] == 0 - # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, i) - elseif faces[side] == 1 - # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, i) - elseif faces[side] == 2 - # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (i, :begin) - else # faces[side] == 3 - # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (i, :end) - end - end - - return interfaces + return interfaces end - # Initialize node_indices of boundary container @inline function init_boundary_node_indices!(boundaries::P4estBoundaryContainer{2}, face, boundary_id) - if face == 0 - # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward) - elseif face == 1 - # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward) - elseif face == 2 - # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin) - else # face == 3 - # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end) - end - - return boundaries -end + if face == 0 + # Index face in negative x-direction + boundaries.node_indices[boundary_id] = (:begin, :i_forward) + elseif face == 1 + # Index face in positive x-direction + boundaries.node_indices[boundary_id] = (:end, :i_forward) + elseif face == 2 + # Index face in negative y-direction + boundaries.node_indices[boundary_id] = (:i_forward, :begin) + else # face == 3 + # Index face in positive y-direction + boundaries.node_indices[boundary_id] = (:i_forward, :end) + end + return boundaries +end # Initialize node_indices of mortar container # faces[1] is expected to be the face of the small side. @inline function init_mortar_node_indices!(mortars, faces, orientation, mortar_id) - for side in 1:2 - # Align mortar in positive coordinate direction of small side. - # For orientation == 1, the large side needs to be indexed backwards - # relative to the mortar. - if side == 1 || orientation == 0 - # Forward indexing for small side or orientation == 0 - i = :i_forward - else - # Backward indexing for large side with reversed orientation - i = :i_backward + for side in 1:2 + # Align mortar in positive coordinate direction of small side. + # For orientation == 1, the large side needs to be indexed backwards + # relative to the mortar. + if side == 1 || orientation == 0 + # Forward indexing for small side or orientation == 0 + i = :i_forward + else + # Backward indexing for large side with reversed orientation + i = :i_backward + end + + if faces[side] == 0 + # Index face in negative x-direction + mortars.node_indices[side, mortar_id] = (:begin, i) + elseif faces[side] == 1 + # Index face in positive x-direction + mortars.node_indices[side, mortar_id] = (:end, i) + elseif faces[side] == 2 + # Index face in negative y-direction + mortars.node_indices[side, mortar_id] = (i, :begin) + else # faces[side] == 3 + # Index face in positive y-direction + mortars.node_indices[side, mortar_id] = (i, :end) + end end - if faces[side] == 0 - # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, i) - elseif faces[side] == 1 - # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, i) - elseif faces[side] == 2 - # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (i, :begin) - else # faces[side] == 3 - # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (i, :end) - end - end - - return mortars + return mortars end - - end # @muladd diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl index f5bffece227..6cdc2cf9611 100644 --- a/src/solvers/dgsem_p4est/containers_3d.jl +++ b/src/solvers/dgsem_p4est/containers_3d.jl @@ -3,315 +3,327 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize data structures in element container function init_elements!(elements, mesh::P4estMesh{3}, basis::LobattoLegendreBasis) - @unpack node_coordinates, jacobian_matrix, - contravariant_vectors, inverse_jacobian = elements + @unpack node_coordinates, jacobian_matrix, + contravariant_vectors, inverse_jacobian = elements - calc_node_coordinates!(node_coordinates, mesh, basis) + calc_node_coordinates!(node_coordinates, mesh, basis) - for element in 1:ncells(mesh) - calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) + for element in 1:ncells(mesh) + calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, - node_coordinates, basis) + calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, + node_coordinates, basis) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis) - end + calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis) + end - return nothing + return nothing end - # Interpolate tree_node_coordinates to each quadrant at the nodes of the specified basis function calc_node_coordinates!(node_coordinates, mesh::P4estMesh{3}, basis::LobattoLegendreBasis) - # Hanging nodes will cause holes in the mesh if its polydeg is higher - # than the polydeg of the solver. - @assert length(basis.nodes) >= length(mesh.nodes) "The solver can't have a lower polydeg than the mesh" + # Hanging nodes will cause holes in the mesh if its polydeg is higher + # than the polydeg of the solver. + @assert length(basis.nodes)>=length(mesh.nodes) "The solver can't have a lower polydeg than the mesh" - calc_node_coordinates!(node_coordinates, mesh, basis.nodes) + calc_node_coordinates!(node_coordinates, mesh, basis.nodes) end # Interpolate tree_node_coordinates to each quadrant at the specified nodes function calc_node_coordinates!(node_coordinates, mesh::P4estMesh{3}, nodes::AbstractVector) - # Macros from `p4est` - p4est_root_len = 1 << P4EST_MAXLEVEL - p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l) + # Macros from `p4est` + p4est_root_len = 1 << P4EST_MAXLEVEL + p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l) - trees = unsafe_wrap_sc(p8est_tree_t, unsafe_load(mesh.p4est).trees) + trees = unsafe_wrap_sc(p8est_tree_t, unsafe_load(mesh.p4est).trees) - for tree in eachindex(trees) - offset = trees[tree].quadrants_offset - quadrants = unsafe_wrap_sc(p8est_quadrant_t, trees[tree].quadrants) + for tree in eachindex(trees) + offset = trees[tree].quadrants_offset + quadrants = unsafe_wrap_sc(p8est_quadrant_t, trees[tree].quadrants) - for i in eachindex(quadrants) - element = offset + i - quad = quadrants[i] + for i in eachindex(quadrants) + element = offset + i + quad = quadrants[i] - quad_length = p4est_quadrant_len(quad.level) / p4est_root_len + quad_length = p4est_quadrant_len(quad.level) / p4est_root_len - nodes_out_x = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.x / p4est_root_len) .- 1 - nodes_out_y = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.y / p4est_root_len) .- 1 - nodes_out_z = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.z / p4est_root_len) .- 1 + nodes_out_x = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+ + quad.x / p4est_root_len) .- 1 + nodes_out_y = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+ + quad.y / p4est_root_len) .- 1 + nodes_out_z = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+ + quad.z / p4est_root_len) .- 1 - matrix1 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_x) - matrix2 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_y) - matrix3 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_z) + matrix1 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_x) + matrix2 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_y) + matrix3 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_z) - multiply_dimensionwise!( - view(node_coordinates, :, :, :, :, element), - matrix1, matrix2, matrix3, - view(mesh.tree_node_coordinates, :, :, :, :, tree) - ) + multiply_dimensionwise!(view(node_coordinates, :, :, :, :, element), + matrix1, matrix2, matrix3, + view(mesh.tree_node_coordinates, :, :, :, :, tree)) + end end - end - return node_coordinates + return node_coordinates end - # Initialize node_indices of interface container @inline function init_interface_node_indices!(interfaces::P4estInterfaceContainer{3}, faces, orientation, interface_id) - # Iterate over primary and secondary element - for side in 1:2 - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). - # The secondary element needs to be indexed differently. - if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward - else - surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation) + # Iterate over primary and secondary element + for side in 1:2 + # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # The secondary element needs to be indexed differently. + if side == 1 + surface_index1 = :i_forward + surface_index2 = :j_forward + else + surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], + faces[1], + orientation) + end + + if faces[side] == 0 + # Index face in negative x-direction + interfaces.node_indices[side, interface_id] = (:begin, surface_index1, + surface_index2) + elseif faces[side] == 1 + # Index face in positive x-direction + interfaces.node_indices[side, interface_id] = (:end, surface_index1, + surface_index2) + elseif faces[side] == 2 + # Index face in negative y-direction + interfaces.node_indices[side, interface_id] = (surface_index1, :begin, + surface_index2) + elseif faces[side] == 3 + # Index face in positive y-direction + interfaces.node_indices[side, interface_id] = (surface_index1, :end, + surface_index2) + elseif faces[side] == 4 + # Index face in negative z-direction + interfaces.node_indices[side, interface_id] = (surface_index1, + surface_index2, :begin) + else # faces[side] == 5 + # Index face in positive z-direction + interfaces.node_indices[side, interface_id] = (surface_index1, + surface_index2, :end) + end end - if faces[side] == 0 - # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, surface_index1, surface_index2) - elseif faces[side] == 1 - # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, surface_index1, surface_index2) - elseif faces[side] == 2 - # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :begin, surface_index2) - elseif faces[side] == 3 - # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :end, surface_index2) - elseif faces[side] == 4 - # Index face in negative z-direction - interfaces.node_indices[side, interface_id] = (surface_index1, surface_index2, :begin) - else # faces[side] == 5 - # Index face in positive z-direction - interfaces.node_indices[side, interface_id] = (surface_index1, surface_index2, :end) - end - end - - return interfaces + return interfaces end - # Initialize node_indices of boundary container @inline function init_boundary_node_indices!(boundaries::P4estBoundaryContainer{3}, face, boundary_id) - if face == 0 - # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward) - elseif face == 1 - # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward) - elseif face == 2 - # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward) - elseif face == 3 - # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward) - elseif face == 4 - # Index face in negative z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin) - else # face == 5 - # Index face in positive z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end) - end + if face == 0 + # Index face in negative x-direction + boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward) + elseif face == 1 + # Index face in positive x-direction + boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward) + elseif face == 2 + # Index face in negative y-direction + boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward) + elseif face == 3 + # Index face in positive y-direction + boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward) + elseif face == 4 + # Index face in negative z-direction + boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin) + else # face == 5 + # Index face in positive z-direction + boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end) + end - return boundaries + return boundaries end - # Initialize node_indices of mortar container # faces[1] is expected to be the face of the small side. @inline function init_mortar_node_indices!(mortars::P4estMortarContainer{3}, faces, orientation, mortar_id) - for side in 1:2 - # Align mortar at small side. - # The large side needs to be indexed differently. - if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward - else - surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation) + for side in 1:2 + # Align mortar at small side. + # The large side needs to be indexed differently. + if side == 1 + surface_index1 = :i_forward + surface_index2 = :j_forward + else + surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], + faces[1], + orientation) + end + + if faces[side] == 0 + # Index face in negative x-direction + mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + surface_index2) + elseif faces[side] == 1 + # Index face in positive x-direction + mortars.node_indices[side, mortar_id] = (:end, surface_index1, + surface_index2) + elseif faces[side] == 2 + # Index face in negative y-direction + mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + surface_index2) + elseif faces[side] == 3 + # Index face in positive y-direction + mortars.node_indices[side, mortar_id] = (surface_index1, :end, + surface_index2) + elseif faces[side] == 4 + # Index face in negative z-direction + mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, + :begin) + else # faces[side] == 5 + # Index face in positive z-direction + mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, + :end) + end end - if faces[side] == 0 - # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, surface_index2) - elseif faces[side] == 1 - # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, surface_index1, surface_index2) - elseif faces[side] == 2 - # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, surface_index2) - elseif faces[side] == 3 - # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, surface_index2) - elseif faces[side] == 4 - # Index face in negative z-direction - mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :begin) - else # faces[side] == 5 - # Index face in positive z-direction - mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :end) - end - end - - return mortars + return mortars end - # Convert `p4est` orientation code to node indices. # Return node indices that index "my side" wrt "other side", # i.e., i and j are indices of other side. function orientation_to_indices_p4est(my_face, other_face, orientation_code) - # my_face and other_face are the face directions (zero-based) - # of "my side" and "other side" respectively. - # Face corner 0 of the face with the lower face direction connects to a corner of the other face. - # The number of this corner is the orientation code in `p4est`. - lower = my_face <= other_face - - # x_pos, y_neg, and z_pos are the directions in which the face has right-handed coordinates - # when looked at from the outside. - my_right_handed = my_face in (1, 2, 5) - other_right_handed = other_face in (1, 2, 5) - - # If both or none are right-handed when looked at from the outside, they will have different - # orientations when looked at from the same side of the interface. - flipped = my_right_handed == other_right_handed - - # In the following illustrations, the face corner numbering of `p4est` is shown. - # ξ and η are the local coordinates of the respective face. - # We're looking at both faces from the same side of the interface, so that "other side" - # (in the illustrations on the left) has right-handed coordinates. - if !flipped - if orientation_code == 0 - # Corner 0 of other side matches corner 0 of my side - # 2┌──────┐3 2┌──────┐3 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 0└──────┘1 - # η η - # ↑ ↑ - # │ │ - # └───> ξ └───> ξ - surface_index1 = :i_forward - surface_index2 = :j_forward - elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side - || (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side - # 2┌──────┐3 0┌──────┐2 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 1└──────┘3 - # η ┌───> η - # ↑ │ - # │ ↓ - # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_forward - elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side - || (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side - # 2┌──────┐3 3┌──────┐1 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 2└──────┘0 - # η ξ - # ↑ ↑ - # │ │ - # └───> ξ η <───┘ - surface_index1 = :j_forward - surface_index2 = :i_backward - else # orientation_code == 3 - # Corner 0 of my side matches corner 3 of other side and - # corner 0 of other side matches corner 3 of my side. - # 2┌──────┐3 1┌──────┐0 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 3└──────┘2 - # η ξ <───┐ - # ↑ │ - # │ ↓ - # └───> ξ η - surface_index1 = :i_backward - surface_index2 = :j_backward - end - else # flipped - if orientation_code == 0 - # Corner 0 of other side matches corner 0 of my side - # 2┌──────┐3 1┌──────┐3 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 0└──────┘2 - # η ξ - # ↑ ↑ - # │ │ - # └───> ξ └───> η - surface_index1 = :j_forward - surface_index2 = :i_forward - elseif orientation_code == 2 - # Corner 0 of my side matches corner 2 of other side and - # corner 0 of other side matches corner 2 of my side. - # 2┌──────┐3 0┌──────┐1 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 2└──────┘3 - # η ┌───> ξ - # ↑ │ - # │ ↓ - # └───> ξ η - surface_index1 = :i_forward - surface_index2 = :j_backward - elseif orientation_code == 1 - # Corner 0 of my side matches corner 1 of other side and - # corner 0 of other side matches corner 1 of my side. - # 2┌──────┐3 3┌──────┐2 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 1└──────┘0 - # η η - # ↑ ↑ - # │ │ - # └───> ξ ξ <───┘ - surface_index1 = :i_backward - surface_index2 = :j_forward - else # orientation_code == 3 - # Corner 0 of my side matches corner 3 of other side and - # corner 0 of other side matches corner 3 of my side. - # 2┌──────┐3 2┌──────┐0 - # │ │ │ │ - # │ │ │ │ - # 0└──────┘1 3└──────┘1 - # η η <───┐ - # ↑ │ - # │ ↓ - # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_backward + # my_face and other_face are the face directions (zero-based) + # of "my side" and "other side" respectively. + # Face corner 0 of the face with the lower face direction connects to a corner of the other face. + # The number of this corner is the orientation code in `p4est`. + lower = my_face <= other_face + + # x_pos, y_neg, and z_pos are the directions in which the face has right-handed coordinates + # when looked at from the outside. + my_right_handed = my_face in (1, 2, 5) + other_right_handed = other_face in (1, 2, 5) + + # If both or none are right-handed when looked at from the outside, they will have different + # orientations when looked at from the same side of the interface. + flipped = my_right_handed == other_right_handed + + # In the following illustrations, the face corner numbering of `p4est` is shown. + # ξ and η are the local coordinates of the respective face. + # We're looking at both faces from the same side of the interface, so that "other side" + # (in the illustrations on the left) has right-handed coordinates. + if !flipped + if orientation_code == 0 + # Corner 0 of other side matches corner 0 of my side + # 2┌──────┐3 2┌──────┐3 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 0└──────┘1 + # η η + # ↑ ↑ + # │ │ + # └───> ξ └───> ξ + surface_index1 = :i_forward + surface_index2 = :j_forward + elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side + || + (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side + # 2┌──────┐3 0┌──────┐2 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 1└──────┘3 + # η ┌───> η + # ↑ │ + # │ ↓ + # └───> ξ ξ + surface_index1 = :j_backward + surface_index2 = :i_forward + elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side + || + (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side + # 2┌──────┐3 3┌──────┐1 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 2└──────┘0 + # η ξ + # ↑ ↑ + # │ │ + # └───> ξ η <───┘ + surface_index1 = :j_forward + surface_index2 = :i_backward + else # orientation_code == 3 + # Corner 0 of my side matches corner 3 of other side and + # corner 0 of other side matches corner 3 of my side. + # 2┌──────┐3 1┌──────┐0 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 3└──────┘2 + # η ξ <───┐ + # ↑ │ + # │ ↓ + # └───> ξ η + surface_index1 = :i_backward + surface_index2 = :j_backward + end + else # flipped + if orientation_code == 0 + # Corner 0 of other side matches corner 0 of my side + # 2┌──────┐3 1┌──────┐3 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 0└──────┘2 + # η ξ + # ↑ ↑ + # │ │ + # └───> ξ └───> η + surface_index1 = :j_forward + surface_index2 = :i_forward + elseif orientation_code == 2 + # Corner 0 of my side matches corner 2 of other side and + # corner 0 of other side matches corner 2 of my side. + # 2┌──────┐3 0┌──────┐1 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 2└──────┘3 + # η ┌───> ξ + # ↑ │ + # │ ↓ + # └───> ξ η + surface_index1 = :i_forward + surface_index2 = :j_backward + elseif orientation_code == 1 + # Corner 0 of my side matches corner 1 of other side and + # corner 0 of other side matches corner 1 of my side. + # 2┌──────┐3 3┌──────┐2 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 1└──────┘0 + # η η + # ↑ ↑ + # │ │ + # └───> ξ ξ <───┘ + surface_index1 = :i_backward + surface_index2 = :j_forward + else # orientation_code == 3 + # Corner 0 of my side matches corner 3 of other side and + # corner 0 of other side matches corner 3 of my side. + # 2┌──────┐3 2┌──────┐0 + # │ │ │ │ + # │ │ │ │ + # 0└──────┘1 3└──────┘1 + # η η <───┐ + # ↑ │ + # │ ↓ + # └───> ξ ξ + surface_index1 = :j_backward + surface_index2 = :i_backward + end end - end - return surface_index1, surface_index2 + return surface_index1, surface_index2 end - - end # @muladd diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index d8283594a22..42d6ea44c5e 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -3,438 +3,479 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: + AbstractContainer + u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::Vector{Int} # [interface] + node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] + local_sides::Vector{Int} # [interface] -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype<:Real, NDIMSP2} <: AbstractContainer - u ::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices ::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides ::Vector{Int} # [interface] - - # internal `resize!`able storage - _u ::Vector{uEltype} + # internal `resize!`able storage + _u::Vector{uEltype} end -@inline nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) = length(interfaces.local_sides) -@inline Base.ndims(::P4estMPIInterfaceContainer{NDIMS}) where NDIMS = NDIMS +@inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) + length(interfaces.local_sides) +end +@inline Base.ndims(::P4estMPIInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) - @unpack _u, local_neighbor_ids, node_indices, local_sides = mpi_interfaces + @unpack _u, local_neighbor_ids, node_indices, local_sides = mpi_interfaces - n_dims = ndims(mpi_interfaces) - n_nodes = size(mpi_interfaces.u, 3) - n_variables = size(mpi_interfaces.u, 2) + n_dims = ndims(mpi_interfaces) + n_nodes = size(mpi_interfaces.u, 3) + n_variables = size(mpi_interfaces.u, 2) - resize!(_u, 2 * n_variables * n_nodes^(n_dims-1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity)) + resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) + mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., + capacity)) - resize!(local_neighbor_ids, capacity) + resize!(local_neighbor_ids, capacity) - resize!(node_indices, capacity) + resize!(node_indices, capacity) - resize!(local_sides, capacity) + resize!(local_sides, capacity) - return nothing + return nothing end - # Create MPI interface container and initialize interface data function init_mpi_interfaces(mesh::ParallelP4estMesh, equations, basis, elements) - NDIMS = ndims(elements) - uEltype = eltype(elements) + NDIMS = ndims(elements) + uEltype = eltype(elements) - # Initialize container - n_mpi_interfaces = count_required_surfaces(mesh).mpi_interfaces + # Initialize container + n_mpi_interfaces = count_required_surfaces(mesh).mpi_interfaces - _u = Vector{uEltype}(undef, 2 * nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_mpi_interfaces) - u = unsafe_wrap(Array, pointer(_u), - (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mpi_interfaces)) + _u = Vector{uEltype}(undef, + 2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) * + n_mpi_interfaces) + u = unsafe_wrap(Array, pointer(_u), + (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)..., + n_mpi_interfaces)) - local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces) + local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces) - node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces) + node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces) - local_sides = Vector{Int}(undef, n_mpi_interfaces) + local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS+2}( - u, local_neighbor_ids, node_indices, local_sides, _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) - init_mpi_interfaces!(mpi_interfaces, mesh) + init_mpi_interfaces!(mpi_interfaces, mesh) - return mpi_interfaces + return mpi_interfaces end function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) - init_surfaces!(nothing, nothing, nothing, mpi_interfaces, nothing, mesh) + init_surfaces!(nothing, nothing, nothing, mpi_interfaces, nothing, mesh) - return mpi_interfaces + return mpi_interfaces end - # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into # `local_neighbor_ids` and `local_neighbor_positions` to describe the ids and positions of the locally # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. -mutable struct P4estMPIMortarContainer{NDIMS, uEltype<:Real, RealT<:Real, NDIMSP1, NDIMSP2, NDIMSP3} <: AbstractContainer - u ::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids ::Vector{Vector{Int}} # [mortar] - local_neighbor_positions::Vector{Vector{Int}} # [mortar] - node_indices ::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions ::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] - # internal `resize!`able storage - _u ::Vector{uEltype} - _node_indices ::Vector{NTuple{NDIMS, Symbol}} - _normal_directions ::Vector{RealT} +mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, + NDIMSP2, NDIMSP3} <: AbstractContainer + u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar] + local_neighbor_positions::Vector{Vector{Int}} # [mortar] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + # internal `resize!`able storage + _u::Vector{uEltype} + _node_indices::Vector{NTuple{NDIMS, Symbol}} + _normal_directions::Vector{RealT} end -@inline nmpimortars(mpi_mortars::P4estMPIMortarContainer) = length(mpi_mortars.local_neighbor_ids) -@inline Base.ndims(::P4estMPIMortarContainer{NDIMS}) where NDIMS = NDIMS +@inline function nmpimortars(mpi_mortars::P4estMPIMortarContainer) + length(mpi_mortars.local_neighbor_ids) +end +@inline Base.ndims(::P4estMPIMortarContainer{NDIMS}) where {NDIMS} = NDIMS function Base.resize!(mpi_mortars::P4estMPIMortarContainer, capacity) - @unpack _u, _node_indices, _normal_directions = mpi_mortars + @unpack _u, _node_indices, _normal_directions = mpi_mortars - n_dims = ndims(mpi_mortars) - n_nodes = size(mpi_mortars.u, 4) - n_variables = size(mpi_mortars.u, 2) + n_dims = ndims(mpi_mortars) + n_nodes = size(mpi_mortars.u, 4) + n_variables = size(mpi_mortars.u, 2) - resize!(_u, 2 * n_variables * 2^(n_dims-1) * n_nodes^(n_dims-1) * capacity) - mpi_mortars.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, 2^(n_dims-1), ntuple(_ -> n_nodes, n_dims-1)..., capacity)) + resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) + mpi_mortars.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, 2^(n_dims - 1), + ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) - resize!(mpi_mortars.local_neighbor_ids, capacity) - resize!(mpi_mortars.local_neighbor_positions, capacity) + resize!(mpi_mortars.local_neighbor_ids, capacity) + resize!(mpi_mortars.local_neighbor_positions, capacity) - resize!(_node_indices, 2 * capacity) - mpi_mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + resize!(_node_indices, 2 * capacity) + mpi_mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) - resize!(_normal_directions, n_dims * n_nodes^(n_dims-1) * 2^(n_dims-1) * capacity) - mpi_mortars.normal_directions = unsafe_wrap(Array, pointer(_normal_directions), - (n_dims, ntuple(_ -> n_nodes, n_dims-1)..., 2^(n_dims-1), capacity)) + resize!(_normal_directions, + n_dims * n_nodes^(n_dims - 1) * 2^(n_dims - 1) * capacity) + mpi_mortars.normal_directions = unsafe_wrap(Array, pointer(_normal_directions), + (n_dims, + ntuple(_ -> n_nodes, n_dims - 1)..., + 2^(n_dims - 1), capacity)) - return nothing + return nothing end - # Create MPI mortar container and initialize MPI mortar data function init_mpi_mortars(mesh::ParallelP4estMesh, equations, basis, elements) - NDIMS = ndims(mesh) - RealT = real(mesh) - uEltype = eltype(elements) - - # Initialize container - n_mpi_mortars = count_required_surfaces(mesh).mpi_mortars - - _u = Vector{uEltype}(undef, - 2 * nvariables(equations) * 2^(NDIMS-1) * nnodes(basis)^(NDIMS-1) * n_mpi_mortars) - u = unsafe_wrap(Array, pointer(_u), - (2, nvariables(equations), 2^(NDIMS-1), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mpi_mortars)) - - local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars) - local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars) - - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars) - node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars)) - - _normal_directions = Vector{RealT}(undef, NDIMS * nnodes(basis)^(NDIMS-1) * 2^(NDIMS-1) * n_mpi_mortars) - normal_directions = unsafe_wrap(Array, pointer(_normal_directions), - (NDIMS, ntuple(_ -> nnodes(basis), NDIMS-1)..., 2^(NDIMS-1), n_mpi_mortars)) - - mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS+1, NDIMS+2, NDIMS+3}( - u, local_neighbor_ids, local_neighbor_positions, node_indices, normal_directions, - _u, _node_indices, _normal_directions) - - if n_mpi_mortars > 0 - init_mpi_mortars!(mpi_mortars, mesh, basis, elements) - end + NDIMS = ndims(mesh) + RealT = real(mesh) + uEltype = eltype(elements) + + # Initialize container + n_mpi_mortars = count_required_surfaces(mesh).mpi_mortars + + _u = Vector{uEltype}(undef, + 2 * nvariables(equations) * 2^(NDIMS - 1) * + nnodes(basis)^(NDIMS - 1) * n_mpi_mortars) + u = unsafe_wrap(Array, pointer(_u), + (2, nvariables(equations), 2^(NDIMS - 1), + ntuple(_ -> nnodes(basis), NDIMS - 1)..., n_mpi_mortars)) + + local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars) + local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars) + + _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars) + node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars)) + + _normal_directions = Vector{RealT}(undef, + NDIMS * nnodes(basis)^(NDIMS - 1) * + 2^(NDIMS - 1) * n_mpi_mortars) + normal_directions = unsafe_wrap(Array, pointer(_normal_directions), + (NDIMS, ntuple(_ -> nnodes(basis), NDIMS - 1)..., + 2^(NDIMS - 1), n_mpi_mortars)) + + mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, + NDIMS + 3}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) + + if n_mpi_mortars > 0 + init_mpi_mortars!(mpi_mortars, mesh, basis, elements) + end - return mpi_mortars + return mpi_mortars end function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements) - init_surfaces!(nothing, nothing, nothing, nothing, mpi_mortars, mesh) - init_normal_directions!(mpi_mortars, basis, elements) + init_surfaces!(nothing, nothing, nothing, nothing, mpi_mortars, mesh) + init_normal_directions!(mpi_mortars, basis, elements) - return mpi_mortars + return mpi_mortars end - # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) - init_surfaces!(interfaces, nothing, nothing, nothing, nothing, mesh) + init_surfaces!(interfaces, nothing, nothing, nothing, nothing, mesh) - return interfaces + return interfaces end function init_mortars!(mortars, mesh::ParallelP4estMesh) - init_surfaces!(nothing, mortars, nothing, nothing, nothing, mesh) + init_surfaces!(nothing, mortars, nothing, nothing, nothing, mesh) - return mortars + return mortars end function init_boundaries!(boundaries, mesh::ParallelP4estMesh) - init_surfaces!(nothing, nothing, boundaries, nothing, nothing, mesh) + init_surfaces!(nothing, nothing, boundaries, nothing, nothing, mesh) - return boundaries + return boundaries end - function reinitialize_containers!(mesh::ParallelP4estMesh, equations, dg::DGSEM, cache) - # Make sure to re-create ghost layer before reinitializing MPI-related containers - update_ghost_layer!(mesh) + # Make sure to re-create ghost layer before reinitializing MPI-related containers + update_ghost_layer!(mesh) - # Re-initialize elements container - @unpack elements = cache - resize!(elements, ncells(mesh)) - init_elements!(elements, mesh, dg.basis) + # Re-initialize elements container + @unpack elements = cache + resize!(elements, ncells(mesh)) + init_elements!(elements, mesh, dg.basis) - required = count_required_surfaces(mesh) + required = count_required_surfaces(mesh) - # resize interfaces container - @unpack interfaces = cache - resize!(interfaces, required.interfaces) + # resize interfaces container + @unpack interfaces = cache + resize!(interfaces, required.interfaces) - # resize boundaries container - @unpack boundaries = cache - resize!(boundaries, required.boundaries) + # resize boundaries container + @unpack boundaries = cache + resize!(boundaries, required.boundaries) - # resize mortars container - @unpack mortars = cache - resize!(mortars, required.mortars) + # resize mortars container + @unpack mortars = cache + resize!(mortars, required.mortars) - # resize mpi_interfaces container - @unpack mpi_interfaces = cache - resize!(mpi_interfaces, required.mpi_interfaces) + # resize mpi_interfaces container + @unpack mpi_interfaces = cache + resize!(mpi_interfaces, required.mpi_interfaces) - # resize mpi_mortars container - @unpack mpi_mortars = cache - resize!(mpi_mortars, required.mpi_mortars) + # resize mpi_mortars container + @unpack mpi_mortars = cache + resize!(mpi_mortars, required.mpi_mortars) - # re-initialize containers together to reduce - # the number of iterations over the mesh in p4est - init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh) + # re-initialize containers together to reduce + # the number of iterations over the mesh in p4est + init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh) - # re-initialize MPI cache - @unpack mpi_cache = cache - init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, - nvariables(equations), nnodes(dg), eltype(elements)) + # re-initialize MPI cache + @unpack mpi_cache = cache + init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, + nvariables(equations), nnodes(dg), eltype(elements)) - # re-initialize and distribute normal directions of MPI mortars; requires MPI communication, so - # the MPI cache must be re-initialized before - init_normal_directions!(mpi_mortars, dg.basis, elements) - exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg)) + # re-initialize and distribute normal directions of MPI mortars; requires MPI communication, so + # the MPI cache must be re-initialized before + init_normal_directions!(mpi_mortars, dg.basis, elements) + exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg)) end - # A helper struct used in initialization methods below -mutable struct ParallelInitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries, MPIInterfaces, MPIMortars, Mesh} - interfaces ::Interfaces - interface_id ::Int - mortars ::Mortars - mortar_id ::Int - boundaries ::Boundaries - boundary_id ::Int - mpi_interfaces ::MPIInterfaces - mpi_interface_id::Int - mpi_mortars ::MPIMortars - mpi_mortar_id ::Int - mesh ::Mesh +mutable struct ParallelInitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries, + MPIInterfaces, MPIMortars, Mesh} + interfaces::Interfaces + interface_id::Int + mortars::Mortars + mortar_id::Int + boundaries::Boundaries + boundary_id::Int + mpi_interfaces::MPIInterfaces + mpi_interface_id::Int + mpi_mortars::MPIMortars + mpi_mortar_id::Int + mesh::Mesh end function ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh) - return ParallelInitSurfacesIterFaceUserData{ - typeof(interfaces), typeof(mortars), typeof(boundaries), typeof(mpi_interfaces), typeof(mpi_mortars), typeof(mesh)}( - interfaces, 1, mortars, 1, boundaries, 1, mpi_interfaces, 1, mpi_mortars, 1, mesh) + return ParallelInitSurfacesIterFaceUserData{ + typeof(interfaces), typeof(mortars), + typeof(boundaries), + typeof(mpi_interfaces), + typeof(mpi_mortars), typeof(mesh)}(interfaces, + 1, + mortars, + 1, + boundaries, + 1, + mpi_interfaces, + 1, + mpi_mortars, + 1, + mesh) end - function init_surfaces_iter_face_parallel(info, user_data) - # Unpack user_data - data = unsafe_pointer_to_objref(Ptr{ParallelInitSurfacesIterFaceUserData}(user_data)) + # Unpack user_data + data = unsafe_pointer_to_objref(Ptr{ParallelInitSurfacesIterFaceUserData}(user_data)) - # Function barrier because the unpacked user_data above is type-unstable - init_surfaces_iter_face_inner(info, data) + # Function barrier because the unpacked user_data above is type-unstable + init_surfaces_iter_face_inner(info, data) end # 2D -cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{2}) = @cfunction(init_surfaces_iter_face_parallel, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{2}) + @cfunction(init_surfaces_iter_face_parallel, Cvoid, + (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{3}) = @cfunction(init_surfaces_iter_face_parallel, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{3}) + @cfunction(init_surfaces_iter_face_parallel, Cvoid, + (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +end # Function barrier for type stability, overload for parallel P4estMesh -function init_surfaces_iter_face_inner(info, user_data::ParallelInitSurfacesIterFaceUserData) - @unpack interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars = user_data - # This function is called during `init_surfaces!`, more precisely it is called for each face - # while p4est iterates over the forest. Since `init_surfaces!` can be used to initialize all - # surfaces at once or any subset of them, some of the unpacked values above may be `nothing` if - # they're not supposed to be initialized during this call. That is why we need additional - # `!== nothing` checks below before initializing individual faces. - if unsafe_load(info).sides.elem_count == 2 - # Two neighboring elements => Interface or mortar - - # Extract surface data - sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) - - if sides[1].is_hanging == false && sides[2].is_hanging == false - # No hanging nodes => normal interface or MPI interface - if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface - if mpi_interfaces !== nothing - init_mpi_interfaces_iter_face_inner(info, sides, user_data) - end - else - if interfaces !== nothing - init_interfaces_iter_face_inner(info, sides, user_data) - end - end - else - # Hanging nodes => mortar or MPI mortar - # First, we check which side is hanging, i.e., on which side we have the refined cells. - # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they - # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar - if sides[1].is_hanging == true - @assert sides[2].is_hanging == false - if any(sides[1].is.hanging.is_ghost .== true) || sides[2].is.full.is_ghost == true - face_has_ghost_side = true +function init_surfaces_iter_face_inner(info, + user_data::ParallelInitSurfacesIterFaceUserData) + @unpack interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars = user_data + # This function is called during `init_surfaces!`, more precisely it is called for each face + # while p4est iterates over the forest. Since `init_surfaces!` can be used to initialize all + # surfaces at once or any subset of them, some of the unpacked values above may be `nothing` if + # they're not supposed to be initialized during this call. That is why we need additional + # `!== nothing` checks below before initializing individual faces. + if unsafe_load(info).sides.elem_count == 2 + # Two neighboring elements => Interface or mortar + + # Extract surface data + sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) + + if sides[1].is_hanging == false && sides[2].is_hanging == false + # No hanging nodes => normal interface or MPI interface + if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface + if mpi_interfaces !== nothing + init_mpi_interfaces_iter_face_inner(info, sides, user_data) + end + else + if interfaces !== nothing + init_interfaces_iter_face_inner(info, sides, user_data) + end + end else - face_has_ghost_side = false + # Hanging nodes => mortar or MPI mortar + # First, we check which side is hanging, i.e., on which side we have the refined cells. + # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they + # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar + if sides[1].is_hanging == true + @assert sides[2].is_hanging == false + if any(sides[1].is.hanging.is_ghost .== true) || + sides[2].is.full.is_ghost == true + face_has_ghost_side = true + else + face_has_ghost_side = false + end + else # sides[2].is_hanging == true + @assert sides[1].is_hanging == false + if sides[1].is.full.is_ghost == true || + any(sides[2].is.hanging.is_ghost .== true) + face_has_ghost_side = true + else + face_has_ghost_side = false + end + end + # Initialize mortar or MPI mortar + if face_has_ghost_side && mpi_mortars !== nothing + init_mpi_mortars_iter_face_inner(info, sides, user_data) + elseif !face_has_ghost_side && mortars !== nothing + init_mortars_iter_face_inner(info, sides, user_data) + end end - else # sides[2].is_hanging == true - @assert sides[1].is_hanging == false - if sides[1].is.full.is_ghost == true || any(sides[2].is.hanging.is_ghost .== true) - face_has_ghost_side = true - else - face_has_ghost_side = false + elseif unsafe_load(info).sides.elem_count == 1 + # One neighboring elements => boundary + if boundaries !== nothing + init_boundaries_iter_face_inner(info, user_data) end - end - # Initialize mortar or MPI mortar - if face_has_ghost_side && mpi_mortars !== nothing - init_mpi_mortars_iter_face_inner(info, sides, user_data) - elseif !face_has_ghost_side && mortars !== nothing - init_mortars_iter_face_inner(info, sides, user_data) - end end - elseif unsafe_load(info).sides.elem_count == 1 - # One neighboring elements => boundary - if boundaries !== nothing - init_boundaries_iter_face_inner(info, user_data) - end - end - return nothing + return nothing end function init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh::ParallelP4estMesh) - # Let p4est iterate over all interfaces and call init_surfaces_iter_face - iter_face_c = cfunction(init_surfaces_iter_face_parallel, Val(ndims(mesh))) - user_data = ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries, - mpi_interfaces, mpi_mortars, mesh) + # Let p4est iterate over all interfaces and call init_surfaces_iter_face + iter_face_c = cfunction(init_surfaces_iter_face_parallel, Val(ndims(mesh))) + user_data = ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries, + mpi_interfaces, mpi_mortars, mesh) - iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c) + iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost, + iter_face_c = iter_face_c) - return nothing + return nothing end - # Initialization of MPI interfaces after the function barrier function init_mpi_interfaces_iter_face_inner(info, sides, user_data) - @unpack mpi_interfaces, mpi_interface_id, mesh = user_data - user_data.mpi_interface_id += 1 - - if sides[1].is.full.is_ghost == true - local_side = 2 - elseif sides[2].is.full.is_ghost == true - local_side = 1 - else - error("should not happen") - end - - # Get local tree, one-based indexing - tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) - # Quadrant numbering offset of the local quadrant at this interface - offset = tree.quadrants_offset - tree_quad_id = sides[local_side].is.full.quadid # quadid in the local tree - # ID of the local neighboring quad, cumulative over local trees - local_quad_id = offset + tree_quad_id - - # p4est uses zero-based indexing, convert to one-based indexing - mpi_interfaces.local_neighbor_ids[mpi_interface_id] = local_quad_id + 1 - mpi_interfaces.local_sides[mpi_interface_id] = local_side - - # Face at which the interface lies - faces = (sides[1].face, sides[2].face) - - # Save mpi_interfaces.node_indices dimension specific in containers_[23]d_parallel.jl - init_mpi_interface_node_indices!(mpi_interfaces, faces, local_side, - unsafe_load(info).orientation, - mpi_interface_id) - - return nothing -end + @unpack mpi_interfaces, mpi_interface_id, mesh = user_data + user_data.mpi_interface_id += 1 + + if sides[1].is.full.is_ghost == true + local_side = 2 + elseif sides[2].is.full.is_ghost == true + local_side = 1 + else + error("should not happen") + end + + # Get local tree, one-based indexing + tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) + # Quadrant numbering offset of the local quadrant at this interface + offset = tree.quadrants_offset + tree_quad_id = sides[local_side].is.full.quadid # quadid in the local tree + # ID of the local neighboring quad, cumulative over local trees + local_quad_id = offset + tree_quad_id + + # p4est uses zero-based indexing, convert to one-based indexing + mpi_interfaces.local_neighbor_ids[mpi_interface_id] = local_quad_id + 1 + mpi_interfaces.local_sides[mpi_interface_id] = local_side + + # Face at which the interface lies + faces = (sides[1].face, sides[2].face) + # Save mpi_interfaces.node_indices dimension specific in containers_[23]d_parallel.jl + init_mpi_interface_node_indices!(mpi_interfaces, faces, local_side, + unsafe_load(info).orientation, + mpi_interface_id) + + return nothing +end # Initialization of MPI mortars after the function barrier function init_mpi_mortars_iter_face_inner(info, sides, user_data) - @unpack mpi_mortars, mpi_mortar_id, mesh = user_data - user_data.mpi_mortar_id += 1 - - # Get Tuple of adjacent trees, one-based indexing - trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), - unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) - # Quadrant numbering offsets of the quadrants at this mortar - offsets = SVector(trees[1].quadrants_offset, - trees[2].quadrants_offset) - - if sides[1].is_hanging == true - hanging_side = 1 - full_side = 2 - else # sides[2].is_hanging == true - hanging_side = 2 - full_side = 1 - end - # Just be sure before accessing is.full or is.hanging later - @assert sides[full_side].is_hanging == false - @assert sides[hanging_side].is_hanging == true - - # Find small quads that are locally available - local_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== false) - - # Get id of local small quadrants within their tree - # Indexing CBinding.Caccessor via a Vector does not work here -> use map instead - tree_small_quad_ids = map(p->sides[hanging_side].is.hanging.quadid[p], local_small_quad_positions) - local_small_quad_ids = offsets[hanging_side] .+ tree_small_quad_ids # ids cumulative over local trees - - # Determine if large quadrant is available and if yes, determine its id - if sides[full_side].is.full.is_ghost == false - local_large_quad_id = offsets[full_side] + sides[full_side].is.full.quadid - else - local_large_quad_id = -1 # large quad is ghost - end - - # Write data to mortar container, convert to 1-based indexing - # Start with small elements - local_neighbor_ids = local_small_quad_ids .+ 1 - local_neighbor_positions = local_small_quad_positions - # Add large element information if it is locally available - if local_large_quad_id > -1 - push!(local_neighbor_ids, local_large_quad_id + 1) # convert to 1-based index - push!(local_neighbor_positions, 2^(ndims(mesh)-1) + 1) - end - - mpi_mortars.local_neighbor_ids[mpi_mortar_id] = local_neighbor_ids - mpi_mortars.local_neighbor_positions[mpi_mortar_id] = local_neighbor_positions - - # init_mortar_node_indices! expects side 1 to contain small elements - faces = (sides[hanging_side].face, sides[full_side].face) - init_mortar_node_indices!(mpi_mortars, faces, unsafe_load(info).orientation, mpi_mortar_id) - - return nothing -end + @unpack mpi_mortars, mpi_mortar_id, mesh = user_data + user_data.mpi_mortar_id += 1 + + # Get Tuple of adjacent trees, one-based indexing + trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), + unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) + # Quadrant numbering offsets of the quadrants at this mortar + offsets = SVector(trees[1].quadrants_offset, + trees[2].quadrants_offset) + + if sides[1].is_hanging == true + hanging_side = 1 + full_side = 2 + else # sides[2].is_hanging == true + hanging_side = 2 + full_side = 1 + end + # Just be sure before accessing is.full or is.hanging later + @assert sides[full_side].is_hanging == false + @assert sides[hanging_side].is_hanging == true + + # Find small quads that are locally available + local_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== + false) + + # Get id of local small quadrants within their tree + # Indexing CBinding.Caccessor via a Vector does not work here -> use map instead + tree_small_quad_ids = map(p -> sides[hanging_side].is.hanging.quadid[p], + local_small_quad_positions) + local_small_quad_ids = offsets[hanging_side] .+ tree_small_quad_ids # ids cumulative over local trees + + # Determine if large quadrant is available and if yes, determine its id + if sides[full_side].is.full.is_ghost == false + local_large_quad_id = offsets[full_side] + sides[full_side].is.full.quadid + else + local_large_quad_id = -1 # large quad is ghost + end + + # Write data to mortar container, convert to 1-based indexing + # Start with small elements + local_neighbor_ids = local_small_quad_ids .+ 1 + local_neighbor_positions = local_small_quad_positions + # Add large element information if it is locally available + if local_large_quad_id > -1 + push!(local_neighbor_ids, local_large_quad_id + 1) # convert to 1-based index + push!(local_neighbor_positions, 2^(ndims(mesh) - 1) + 1) + end + mpi_mortars.local_neighbor_ids[mpi_mortar_id] = local_neighbor_ids + mpi_mortars.local_neighbor_positions[mpi_mortar_id] = local_neighbor_positions + + # init_mortar_node_indices! expects side 1 to contain small elements + faces = (sides[hanging_side].face, sides[full_side].face) + init_mortar_node_indices!(mpi_mortars, faces, unsafe_load(info).orientation, + mpi_mortar_id) + + return nothing +end # Iterate over all interfaces and count # - (inner) interfaces @@ -444,90 +485,97 @@ end # - (MPI) mortars at subdomain boundaries # and collect the numbers in `user_data` in this order. function count_surfaces_iter_face_parallel(info, user_data) - if unsafe_load(info).sides.elem_count == 2 - # Two neighboring elements => Interface or mortar - - # Extract surface data - sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) - - if sides[1].is_hanging == false && sides[2].is_hanging == false - # No hanging nodes => normal interface or MPI interface - if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface - # Unpack user_data = [mpi_interface_count] and increment mpi_interface_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 4) - unsafe_store!(ptr, id + 1, 4) - else - # Unpack user_data = [interface_count] and increment interface_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 1) - unsafe_store!(ptr, id + 1, 1) - end - else - # Hanging nodes => mortar or MPI mortar - # First, we check which side is hanging, i.e., on which side we have the refined cells. - # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they - # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar - if sides[1].is_hanging == true - @assert sides[2].is_hanging == false - if any(sides[1].is.hanging.is_ghost .== true) || sides[2].is.full.is_ghost == true - face_has_ghost_side = true - else - face_has_ghost_side = false - end - else # sides[2].is_hanging == true - @assert sides[1].is_hanging == false - if sides[1].is.full.is_ghost == true || any(sides[2].is.hanging.is_ghost .== true) - face_has_ghost_side = true + if unsafe_load(info).sides.elem_count == 2 + # Two neighboring elements => Interface or mortar + + # Extract surface data + sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) + + if sides[1].is_hanging == false && sides[2].is_hanging == false + # No hanging nodes => normal interface or MPI interface + if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface + # Unpack user_data = [mpi_interface_count] and increment mpi_interface_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 4) + unsafe_store!(ptr, id + 1, 4) + else + # Unpack user_data = [interface_count] and increment interface_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 1) + unsafe_store!(ptr, id + 1, 1) + end else - face_has_ghost_side = false + # Hanging nodes => mortar or MPI mortar + # First, we check which side is hanging, i.e., on which side we have the refined cells. + # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they + # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar + if sides[1].is_hanging == true + @assert sides[2].is_hanging == false + if any(sides[1].is.hanging.is_ghost .== true) || + sides[2].is.full.is_ghost == true + face_has_ghost_side = true + else + face_has_ghost_side = false + end + else # sides[2].is_hanging == true + @assert sides[1].is_hanging == false + if sides[1].is.full.is_ghost == true || + any(sides[2].is.hanging.is_ghost .== true) + face_has_ghost_side = true + else + face_has_ghost_side = false + end + end + if face_has_ghost_side + # Unpack user_data = [mpi_mortar_count] and increment mpi_mortar_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 5) + unsafe_store!(ptr, id + 1, 5) + else + # Unpack user_data = [mortar_count] and increment mortar_count + ptr = Ptr{Int}(user_data) + id = unsafe_load(ptr, 2) + unsafe_store!(ptr, id + 1, 2) + end end - end - if face_has_ghost_side - # Unpack user_data = [mpi_mortar_count] and increment mpi_mortar_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 5) - unsafe_store!(ptr, id + 1, 5) - else - # Unpack user_data = [mortar_count] and increment mortar_count + elseif unsafe_load(info).sides.elem_count == 1 + # One neighboring elements => boundary + + # Unpack user_data = [boundary_count] and increment boundary_count ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 2) - unsafe_store!(ptr, id + 1, 2) - end + id = unsafe_load(ptr, 3) + unsafe_store!(ptr, id + 1, 3) end - elseif unsafe_load(info).sides.elem_count == 1 - # One neighboring elements => boundary - # Unpack user_data = [boundary_count] and increment boundary_count - ptr = Ptr{Int}(user_data) - id = unsafe_load(ptr, 3) - unsafe_store!(ptr, id + 1, 3) - end - - return nothing + return nothing end # 2D -cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{2}) = @cfunction(count_surfaces_iter_face_parallel, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{2}) + @cfunction(count_surfaces_iter_face_parallel, Cvoid, + (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{3}) = @cfunction(count_surfaces_iter_face_parallel, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{3}) + @cfunction(count_surfaces_iter_face_parallel, Cvoid, + (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +end function count_required_surfaces(mesh::ParallelP4estMesh) - # Let p4est iterate over all interfaces and call count_surfaces_iter_face_parallel - iter_face_c = cfunction(count_surfaces_iter_face_parallel, Val(ndims(mesh))) + # Let p4est iterate over all interfaces and call count_surfaces_iter_face_parallel + iter_face_c = cfunction(count_surfaces_iter_face_parallel, Val(ndims(mesh))) - # interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars - user_data = [0, 0, 0, 0, 0] + # interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars + user_data = [0, 0, 0, 0, 0] - iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c) + iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost, + iter_face_c = iter_face_c) - # Return counters - return (interfaces = user_data[1], - mortars = user_data[2], - boundaries = user_data[3], - mpi_interfaces = user_data[4], - mpi_mortars = user_data[5]) + # Return counters + return (interfaces = user_data[1], + mortars = user_data[2], + boundaries = user_data[3], + mpi_interfaces = user_data[4], + mpi_mortars = user_data[5]) end - - -end # @muladd \ No newline at end of file +end # @muladd diff --git a/src/solvers/dgsem_p4est/containers_parallel_2d.jl b/src/solvers/dgsem_p4est/containers_parallel_2d.jl index 8510b4a50c1..8c39e4a69c8 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_2d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_2d.jl @@ -3,77 +3,81 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize node_indices of MPI interface container -@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{2}, +@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{ + 2 + }, faces, local_side, orientation, mpi_interface_id) - # Align interface in positive coordinate direction of primary element. - # For orientation == 1, the secondary element needs to be indexed backwards - # relative to the interface. - if local_side == 1 || orientation == 0 - # Forward indexing - i = :i_forward - else - # Backward indexing - i = :i_backward - end + # Align interface in positive coordinate direction of primary element. + # For orientation == 1, the secondary element needs to be indexed backwards + # relative to the interface. + if local_side == 1 || orientation == 0 + # Forward indexing + i = :i_forward + else + # Backward indexing + i = :i_backward + end - if faces[local_side] == 0 - # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i) - elseif faces[local_side] == 1 - # Index face in positive x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:end, i) - elseif faces[local_side] == 2 - # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin) - else # faces[local_side] == 3 - # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :end) - end + if faces[local_side] == 0 + # Index face in negative x-direction + mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i) + elseif faces[local_side] == 1 + # Index face in positive x-direction + mpi_interfaces.node_indices[mpi_interface_id] = (:end, i) + elseif faces[local_side] == 2 + # Index face in negative y-direction + mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin) + else # faces[local_side] == 3 + # Index face in positive y-direction + mpi_interfaces.node_indices[mpi_interface_id] = (i, :end) + end - return mpi_interfaces + return mpi_interfaces end - # Normal directions of small element surfaces are needed to calculate the mortar fluxes. Initialize # them for locally available small elements. -function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{2}, basis, elements) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars - @unpack contravariant_vectors = elements - index_range = eachnode(basis) +function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{2}, basis, + elements) + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars + @unpack contravariant_vectors = elements + index_range = eachnode(basis) - @threaded for mortar in 1:nmpimortars(mpi_mortars) - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) + @threaded for mortar in 1:nmpimortars(mpi_mortars) + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) - i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) - j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range) + i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], + index_range) - for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar]) - # ignore large elements - if position == 3 - continue - end + for (element, position) in zip(local_neighbor_ids[mortar], + local_neighbor_positions[mortar]) + # ignore large elements + if position == 3 + continue + end - i_small = i_small_start - j_small = j_small_start - for node in eachnode(basis) - # Get the normal direction on the small element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(small_direction, contravariant_vectors, - i_small, j_small, element) - @views mpi_mortars.normal_directions[:, node, position, mortar] .= normal_direction + i_small = i_small_start + j_small = j_small_start + for node in eachnode(basis) + # Get the normal direction on the small element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(small_direction, + contravariant_vectors, + i_small, j_small, element) + @views mpi_mortars.normal_directions[:, node, position, mortar] .= normal_direction - i_small += i_small_step - j_small += j_small_step - end + i_small += i_small_step + j_small += j_small_step + end + end end - end end - - -end # muladd \ No newline at end of file +end # muladd diff --git a/src/solvers/dgsem_p4est/containers_parallel_3d.jl b/src/solvers/dgsem_p4est/containers_parallel_3d.jl index 3a9fe90a8fb..be4e2bfbfc9 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_3d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_3d.jl @@ -3,128 +3,149 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize node_indices of MPI interface container -@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{3}, +@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{ + 3 + }, faces, local_side, orientation, mpi_interface_id) - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). - # The secondary element needs to be indexed differently. - if local_side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward - else # local_side == 2 - surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation) - end + # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # The secondary element needs to be indexed differently. + if local_side == 1 + surface_index1 = :i_forward + surface_index2 = :j_forward + else # local_side == 2 + surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], + faces[1], + orientation) + end - if faces[local_side] == 0 - # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1, surface_index2) - elseif faces[local_side] == 1 - # Index face in positive x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1, surface_index2) - elseif faces[local_side] == 2 - # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin, surface_index2) - elseif faces[local_side] == 3 - # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end, surface_index2) - elseif faces[local_side] == 4 - # Index face in negative z-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, :begin) - else # faces[local_side] == 5 - # Index face in positive z-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, :end) - end + if faces[local_side] == 0 + # Index face in negative x-direction + mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1, + surface_index2) + elseif faces[local_side] == 1 + # Index face in positive x-direction + mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1, + surface_index2) + elseif faces[local_side] == 2 + # Index face in negative y-direction + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin, + surface_index2) + elseif faces[local_side] == 3 + # Index face in positive y-direction + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end, + surface_index2) + elseif faces[local_side] == 4 + # Index face in negative z-direction + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, + :begin) + else # faces[local_side] == 5 + # Index face in positive z-direction + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, + :end) + end - return mpi_interfaces + return mpi_interfaces end - # Initialize node_indices of MPI mortar container. Works the same as for its serial counterpart. # faces[1] is expected to be the face of the small side. @inline function init_mortar_node_indices!(mortars::P4estMPIMortarContainer{3}, faces, orientation, mortar_id) - for side in 1:2 - # Align mortar at small side. - # The large side needs to be indexed differently. - if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward - else - surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation) - end + for side in 1:2 + # Align mortar at small side. + # The large side needs to be indexed differently. + if side == 1 + surface_index1 = :i_forward + surface_index2 = :j_forward + else + surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], + faces[1], + orientation) + end - if faces[side] == 0 - # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, surface_index2) - elseif faces[side] == 1 - # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, surface_index1, surface_index2) - elseif faces[side] == 2 - # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, surface_index2) - elseif faces[side] == 3 - # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, surface_index2) - elseif faces[side] == 4 - # Index face in negative z-direction - mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :begin) - else # faces[side] == 5 - # Index face in positive z-direction - mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :end) + if faces[side] == 0 + # Index face in negative x-direction + mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + surface_index2) + elseif faces[side] == 1 + # Index face in positive x-direction + mortars.node_indices[side, mortar_id] = (:end, surface_index1, + surface_index2) + elseif faces[side] == 2 + # Index face in negative y-direction + mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + surface_index2) + elseif faces[side] == 3 + # Index face in positive y-direction + mortars.node_indices[side, mortar_id] = (surface_index1, :end, + surface_index2) + elseif faces[side] == 4 + # Index face in negative z-direction + mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, + :begin) + else # faces[side] == 5 + # Index face in positive z-direction + mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, + :end) + end end - end - return mortars + return mortars end - # Normal directions of small element surfaces are needed to calculate the mortar fluxes. Initialize # them for locally available small elements. -function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{3}, basis, elements) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars - @unpack contravariant_vectors = elements - index_range = eachnode(basis) - - @threaded for mortar in 1:nmpimortars(mpi_mortars) - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - - i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range) - j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range) - k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range) - - for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar]) - # ignore large elements - if position == 5 - continue - end - - i_small = i_small_start - j_small = j_small_start - k_small = k_small_start - for j in eachnode(basis) - for i in eachnode(basis) - # Get the normal direction on the small element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(small_direction, contravariant_vectors, - i_small, j_small, k_small, element) - @views mpi_mortars.normal_directions[:, i, j, position, mortar] .= normal_direction - - i_small += i_small_step_i - j_small += j_small_step_i - k_small += k_small_step_i +function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{3}, basis, + elements) + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars + @unpack contravariant_vectors = elements + index_range = eachnode(basis) + + @threaded for mortar in 1:nmpimortars(mpi_mortars) + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + + i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], + index_range) + j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], + index_range) + k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], + index_range) + + for (element, position) in zip(local_neighbor_ids[mortar], + local_neighbor_positions[mortar]) + # ignore large elements + if position == 5 + continue + end + + i_small = i_small_start + j_small = j_small_start + k_small = k_small_start + for j in eachnode(basis) + for i in eachnode(basis) + # Get the normal direction on the small element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(small_direction, + contravariant_vectors, + i_small, j_small, k_small, + element) + @views mpi_mortars.normal_directions[:, i, j, position, mortar] .= normal_direction + + i_small += i_small_step_i + j_small += j_small_step_i + k_small += k_small_step_i + end + i_small += i_small_step_j + j_small += j_small_step_j + k_small += k_small_step_j + end end - i_small += i_small_step_j - j_small += j_small_step_j - k_small += k_small_step_j - end end - end end - - -end # muladd \ No newline at end of file +end # muladd diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl index 22f847dbf3e..a7cc1eee04d 100644 --- a/src/solvers/dgsem_p4est/dg.jl +++ b/src/solvers/dgsem_p4est/dg.jl @@ -3,52 +3,52 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache(mesh::P4estMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real} - # Make sure to balance the `p4est` before creating any containers - # in case someone has tampered with the `p4est` after creating the mesh - balance!(mesh) +function create_cache(mesh::P4estMesh, equations::AbstractEquations, dg::DG, ::Any, + ::Type{uEltype}) where {uEltype <: Real} + # Make sure to balance the `p4est` before creating any containers + # in case someone has tampered with the `p4est` after creating the mesh + balance!(mesh) - elements = init_elements(mesh, equations, dg.basis, uEltype) - interfaces = init_interfaces(mesh, equations, dg.basis, elements) - boundaries = init_boundaries(mesh, equations, dg.basis, elements) - mortars = init_mortars(mesh, equations, dg.basis, elements) + elements = init_elements(mesh, equations, dg.basis, uEltype) + interfaces = init_interfaces(mesh, equations, dg.basis, elements) + boundaries = init_boundaries(mesh, equations, dg.basis, elements) + mortars = init_mortars(mesh, equations, dg.basis, elements) - cache = (; elements, interfaces, boundaries, mortars) + cache = (; elements, interfaces, boundaries, mortars) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) - return cache + return cache end - # Extract outward-pointing normal direction # (contravariant vector ±Ja^i, i = index) # Note that this vector is not normalized @inline function get_normal_direction(direction, contravariant_vectors, indices...) - - orientation = (direction + 1) >> 1 - normal = get_contravariant_vector(orientation, contravariant_vectors, indices...) - - # Contravariant vectors at interfaces in negative coordinate direction are pointing inwards - if isodd(direction) - return -normal - else - return normal - end + orientation = (direction + 1) >> 1 + normal = get_contravariant_vector(orientation, contravariant_vectors, indices...) + + # Contravariant vectors at interfaces in negative coordinate direction are pointing inwards + if isodd(direction) + return -normal + else + return normal + end end - include("containers.jl") + include("dg_2d.jl") +include("dg_2d_parabolic.jl") + include("dg_3d.jl") include("dg_parallel.jl") - - end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index a6d3d6abaeb..bc7d9edb6ef 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -3,23 +3,23 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # The methods below are specialized on the mortar type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype) - # TODO: Taal performance using different types - MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, - uEltype, 2, - nvariables(equations) * nnodes(mortar_l2)} - fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] - fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] - u_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] - - (; fstar_upper_threaded, fstar_lower_threaded, u_threaded) +function create_cache(mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, + uEltype) + # TODO: Taal performance using different types + MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, + uEltype, 2, + nvariables(equations) * nnodes(mortar_l2)} + fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] + fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] + u_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] + + (; fstar_upper_threaded, fstar_lower_threaded, u_threaded) end - # index_to_start_step_2d(index::Symbol, index_range) # # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`), @@ -42,154 +42,166 @@ end # j_volume += j_volume_step # end @inline function index_to_start_step_2d(index::Symbol, index_range) - index_begin = first(index_range) - index_end = last(index_range) - - if index === :begin - return index_begin, 0 - elseif index === :end - return index_end, 0 - elseif index === :i_forward - return index_begin, 1 - else # if index === :i_backward - return index_end, -1 - end + index_begin = first(index_range) + index_end = last(index_range) + + if index === :begin + return index_begin, 0 + elseif index === :end + return index_end, 0 + elseif index === :i_forward + return index_begin, 1 + else # if index === :i_backward + return index_end, -1 + end end # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::P4estMesh{2}, equations, surface_integral, dg::DG) - @unpack interfaces = cache - index_range = eachnode(dg) - - @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, primary_element] - end - i_primary += i_primary_step - j_primary += j_primary_step - end + @unpack interfaces = cache + index_range = eachnode(dg) + + @threaded for interface in eachinterface(dg, cache) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + primary_element = interfaces.neighbor_ids[1, interface] + primary_indices = interfaces.node_indices[1, interface] + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in eachnode(dg) + for v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, + primary_element] + end + i_primary += i_primary_step + j_primary += j_primary_step + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range) - j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range) - - i_secondary = i_secondary_start - j_secondary = j_secondary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, secondary_element] - end - i_secondary += i_secondary_step - j_secondary += j_secondary_step + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = interfaces.neighbor_ids[2, interface] + secondary_indices = interfaces.node_indices[2, interface] + + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in eachnode(dg) + for v in eachvariable(equations) + interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, + secondary_element] + end + i_secondary += i_secondary_step + j_secondary += j_secondary_step + end end - end - return nothing + return nothing end - function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{2}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.interfaces - @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) - index_end = last(index_range) - - @threaded for interface in eachinterface(dg, cache) - # Get element and side index information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - # Create the local i,j indexing on the primary element used to pull normal direction information - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - - # Get element and side index information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - - # Initiate the secondary index to be used in the surface for loop. - # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices - node_secondary = index_end - node_secondary_step = -1 - else - node_secondary = 1 - node_secondary_step = 1 - end + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + index_end = last(index_range) + + @threaded for interface in eachinterface(dg, cache) + # Get element and side index information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + + # Get element and side index information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if :i_backward in secondary_indices + node_secondary = index_end + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end - for node in eachnode(dg) - # Get the normal direction on the primary element. - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, contravariant_vectors, - i_primary, j_primary, primary_element) - - calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - interface, normal_direction, - node, primary_direction, primary_element, - node_secondary, secondary_direction, secondary_element) - - # Increment primary element indices to pull the normal direction - i_primary += i_primary_step - j_primary += j_primary_step - # Increment the surface node index along the secondary element - node_secondary += node_secondary_step + for node in eachnode(dg) + # Get the normal direction on the primary element. + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, + equations, + surface_integral, dg, cache, + interface, normal_direction, + node, primary_direction, primary_element, + node_secondary, secondary_direction, secondary_element) + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step + end end - end - return nothing + return nothing end - # Inlined version of the interface flux computation for conservation laws @inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{2}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, - primary_node_index, primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces - @unpack surface_flux = surface_integral + primary_node_index, primary_direction_index, + primary_element_index, + secondary_node_index, secondary_direction_index, + secondary_element_index) + @unpack u = cache.interfaces + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + interface_index) - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - for v in eachvariable(equations) - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v] - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -flux_[v] - end + for v in eachvariable(equations) + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v] + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -flux_[v] + end end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms @@ -198,129 +210,135 @@ end nonconservative_terms::True, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, - primary_node_index, primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces - surface_flux, nonconservative_flux = surface_integral.surface_flux - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index) - - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - - # Compute both nonconservative fluxes - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `normal_direction` twice. - noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - # Store the flux with nonconservative terms on the primary and secondary elements - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = ( - flux_[v] + 0.5 * noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -( - flux_[v] + 0.5 * noncons_secondary[v]) - end + primary_node_index, primary_direction_index, + primary_element_index, + secondary_node_index, secondary_direction_index, + secondary_element_index) + @unpack u = cache.interfaces + surface_flux, nonconservative_flux = surface_integral.surface_flux + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + interface_index) + + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + + # Compute both nonconservative fluxes + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `normal_direction` twice. + noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + # Store the flux with nonconservative terms on the primary and secondary elements + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (flux_[v] + + 0.5 * + noncons_primary[v]) + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] + + 0.5 * + noncons_secondary[v]) + end end - function prolong2boundaries!(cache, u, mesh::P4estMesh{2}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - index_range = eachnode(dg) - - @threaded for boundary in eachboundary(dg, cache) - # Copy solution data from the element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - element = boundaries.neighbor_ids[boundary] - node_indices = boundaries.node_indices[boundary] - - i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) - j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) - - i_node = i_node_start - j_node = j_node_start - for i in eachnode(dg) - for v in eachvariable(equations) - boundaries.u[v, i, boundary] = u[v, i_node, j_node, element] - end - i_node += i_node_step - j_node += j_node_step + @unpack boundaries = cache + index_range = eachnode(dg) + + @threaded for boundary in eachboundary(dg, cache) + # Copy solution data from the element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + element = boundaries.neighbor_ids[boundary] + node_indices = boundaries.node_indices[boundary] + + i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) + j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) + + i_node = i_node_start + j_node = j_node_start + for i in eachnode(dg) + for v in eachvariable(equations) + boundaries.u[v, i, boundary] = u[v, i_node, j_node, element] + end + i_node += i_node_step + j_node += j_node_step + end end - end - return nothing + return nothing end - function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, mesh::P4estMesh{2}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - @unpack surface_flux_values = cache.elements - index_range = eachnode(dg) - - @threaded for local_index in eachindex(boundary_indexing) - # Use the local index to get the global boundary index from the pre-sorted list - boundary = boundary_indexing[local_index] - - # Get information on the adjacent element, compute the surface fluxes, - # and store them - element = boundaries.neighbor_ids[boundary] - node_indices = boundaries.node_indices[boundary] - direction = indices2direction(node_indices) - - i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) - j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) - - i_node = i_node_start - j_node = j_node_start - for node in eachnode(dg) - calc_boundary_flux!(surface_flux_values, t, boundary_condition, - mesh, have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - i_node, j_node, - node, direction, element, boundary) - - i_node += i_node_step - j_node += j_node_step + @unpack boundaries = cache + @unpack surface_flux_values = cache.elements + index_range = eachnode(dg) + + @threaded for local_index in eachindex(boundary_indexing) + # Use the local index to get the global boundary index from the pre-sorted list + boundary = boundary_indexing[local_index] + + # Get information on the adjacent element, compute the surface fluxes, + # and store them + element = boundaries.neighbor_ids[boundary] + node_indices = boundaries.node_indices[boundary] + direction = indices2direction(node_indices) + + i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) + j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) + + i_node = i_node_start + j_node = j_node_start + for node in eachnode(dg) + calc_boundary_flux!(surface_flux_values, t, boundary_condition, + mesh, have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + i_node, j_node, + node, direction, element, boundary) + + i_node += i_node_step + j_node += j_node_step + end end - end end - # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::P4estMesh{2}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache, i_index, j_index, - node_index, direction_index, element_index, boundary_index) - @unpack boundaries = cache - @unpack node_coordinates, contravariant_vectors = cache.elements - @unpack surface_flux = surface_integral + node_index, direction_index, element_index, + boundary_index) + @unpack boundaries = cache + @unpack node_coordinates, contravariant_vectors = cache.elements + @unpack surface_flux = surface_integral - # Extract solution data from boundary container - u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index) + # Extract solution data from boundary container + u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index) - # Outward-pointing normal direction (not normalized) - normal_direction = get_normal_direction(direction_index, contravariant_vectors, - i_index, j_index, element_index) + # Outward-pointing normal direction (not normalized) + normal_direction = get_normal_direction(direction_index, contravariant_vectors, + i_index, j_index, element_index) - # Coordinates at boundary node - x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, element_index) + # Coordinates at boundary node + x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, + element_index) - flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations) + flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations) - # Copy flux to element storage in the correct orientation - for v in eachvariable(equations) - surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] - end + # Copy flux to element storage in the correct orientation + for v in eachvariable(equations) + surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] + end end # inlined version of the boundary flux with nonconservative terms calculation along a physical interface @@ -329,159 +347,168 @@ end nonconservative_terms::True, equations, surface_integral, dg::DG, cache, i_index, j_index, - node_index, direction_index, element_index, boundary_index) - @unpack boundaries = cache - @unpack node_coordinates, contravariant_vectors = cache.elements - surface_flux, nonconservative_flux = surface_integral.surface_flux - - # Extract solution data from boundary container - u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index) - - # Outward-pointing normal direction (not normalized) - normal_direction = get_normal_direction(direction_index, contravariant_vectors, - i_index, j_index, element_index) - - # Coordinates at boundary node - x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, element_index) - - # Call pointwise numerical flux function for the conservative part - # in the normal direction on the boundary - flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations) - - # Compute pointwise nonconservative numerical flux at the boundary. - # Note: This does not set any type of boundary condition for the nonconservative term - noncons_ = nonconservative_flux(u_inner, u_inner, normal_direction, normal_direction, equations) - - # Copy flux to element storage in the correct orientation - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] + 0.5 * noncons_[v] - end + node_index, direction_index, element_index, + boundary_index) + @unpack boundaries = cache + @unpack node_coordinates, contravariant_vectors = cache.elements + surface_flux, nonconservative_flux = surface_integral.surface_flux + + # Extract solution data from boundary container + u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index) + + # Outward-pointing normal direction (not normalized) + normal_direction = get_normal_direction(direction_index, contravariant_vectors, + i_index, j_index, element_index) + + # Coordinates at boundary node + x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, + element_index) + + # Call pointwise numerical flux function for the conservative part + # in the normal direction on the boundary + flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations) + + # Compute pointwise nonconservative numerical flux at the boundary. + # Note: This does not set any type of boundary condition for the nonconservative term + noncons_ = nonconservative_flux(u_inner, u_inner, normal_direction, + normal_direction, equations) + + # Copy flux to element storage in the correct orientation + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] + + 0.5 * + noncons_[v] + end end - function prolong2mortars!(cache, u, mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - @unpack neighbor_ids, node_indices = cache.mortars - index_range = eachnode(dg) - - @threaded for mortar in eachmortar(dg, cache) - # Copy solution data from the small elements using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - small_indices = node_indices[1, mortar] - - i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) - j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range) - - for position in 1:2 - i_small = i_small_start - j_small = j_small_start - element = neighbor_ids[position, mortar] - for i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, element] + @unpack neighbor_ids, node_indices = cache.mortars + index_range = eachnode(dg) + + @threaded for mortar in eachmortar(dg, cache) + # Copy solution data from the small elements using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + small_indices = node_indices[1, mortar] + + i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], + index_range) + + for position in 1:2 + i_small = i_small_start + j_small = j_small_start + element = neighbor_ids[position, mortar] + for i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, + element] + end + i_small += i_small_step + j_small += j_small_step + end end - i_small += i_small_step - j_small += j_small_step - end - end - - # Buffer to copy solution values of the large element in the correct orientation - # before interpolating - u_buffer = cache.u_threaded[Threads.threadid()] - - # Copy solution of large element face to buffer in the - # correct orientation - large_indices = node_indices[2, mortar] + # Buffer to copy solution values of the large element in the correct orientation + # before interpolating + u_buffer = cache.u_threaded[Threads.threadid()] + + # Copy solution of large element face to buffer in the + # correct orientation + large_indices = node_indices[2, mortar] + + i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], + index_range) + j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], + index_range) + + i_large = i_large_start + j_large = j_large_start + element = neighbor_ids[3, mortar] + for i in eachnode(dg) + for v in eachvariable(equations) + u_buffer[v, i] = u[v, i_large, j_large, element] + end + i_large += i_large_step + j_large += j_large_step + end - i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], index_range) - j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], index_range) - - i_large = i_large_start - j_large = j_large_start - element = neighbor_ids[3, mortar] - for i in eachnode(dg) - for v in eachvariable(equations) - u_buffer[v, i] = u[v, i_large, j_large, element] - end - i_large += i_large_step - j_large += j_large_step + # Interpolate large element face data from buffer to small face locations + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, mortar), + mortar_l2.forward_lower, + u_buffer) + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, mortar), + mortar_l2.forward_upper, + u_buffer) end - # Interpolate large element face data from buffer to small face locations - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, mortar), - mortar_l2.forward_lower, - u_buffer) - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, mortar), - mortar_l2.forward_upper, - u_buffer) - end - - return nothing + return nothing end - function calc_mortar_flux!(surface_flux_values, mesh::P4estMesh{2}, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.mortars - @unpack contravariant_vectors = cache.elements - @unpack fstar_upper_threaded, fstar_lower_threaded = cache - index_range = eachnode(dg) - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar = (fstar_lower_threaded[Threads.threadid()], - fstar_upper_threaded[Threads.threadid()]) - - # Get index information on the small elements - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) + @unpack neighbor_ids, node_indices = cache.mortars + @unpack contravariant_vectors = cache.elements + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + index_range = eachnode(dg) + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar = (fstar_lower_threaded[Threads.threadid()], + fstar_upper_threaded[Threads.threadid()]) + + # Get index information on the small elements + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + + i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], + index_range) + + for position in 1:2 + i_small = i_small_start + j_small = j_small_start + element = neighbor_ids[position, mortar] + for node in eachnode(dg) + # Get the normal direction on the small element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(small_direction, + contravariant_vectors, + i_small, j_small, element) + + calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + surface_integral, dg, cache, + mortar, position, normal_direction, + node) + + i_small += i_small_step + j_small += j_small_step + end + end - i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) - j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range) + # Buffer to interpolate flux values of the large element to before + # copying in the correct orientation + u_buffer = cache.u_threaded[Threads.threadid()] - for position in 1:2 - i_small = i_small_start - j_small = j_small_start - element = neighbor_ids[position, mortar] - for node in eachnode(dg) - # Get the normal direction on the small element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(small_direction, contravariant_vectors, - i_small, j_small, element) - - calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - mortar, position, normal_direction, - node) - - i_small += i_small_step - j_small += j_small_step - end + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar, u_buffer) end - # Buffer to interpolate flux values of the large element to before - # copying in the correct orientation - u_buffer = cache.u_threaded[Threads.threadid()] - - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer) - end - - return nothing + return nothing end - # Inlined version of the mortar flux computation on small elements for conservation laws @inline function calc_mortar_flux!(fstar, mesh::P4estMesh{2}, @@ -489,15 +516,16 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, node_index) - @unpack u = cache.mortars - @unpack surface_flux = surface_integral + @unpack u = cache.mortars + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, + mortar_index) - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Copy flux to buffer - set_node_vars!(fstar[position_index], flux, equations, dg, node_index) + # Copy flux to buffer + set_node_vars!(fstar[position_index], flux, equations, dg, node_index) end # Inlined version of the mortar flux computation on small elements for equations with conservative and @@ -508,124 +536,129 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, node_index) - @unpack u = cache.mortars - surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u = cache.mortars + surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, + mortar_index) - # Compute conservative flux - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + # Compute conservative flux + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Compute nonconservative flux and add it to the conservative flux. - # The nonconservative flux is scaled by a factor of 0.5 based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) + # Compute nonconservative flux and add it to the conservative flux. + # The nonconservative flux is scaled by a factor of 0.5 based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, + equations) - flux_plus_noncons = flux + 0.5 * noncons + flux_plus_noncons = flux + 0.5 * noncons - # Copy to buffer - set_node_vars!(fstar[position_index], flux_plus_noncons, equations, dg, node_index) + # Copy to buffer + set_node_vars!(fstar[position_index], flux_plus_noncons, equations, dg, node_index) end - @inline function mortar_fluxes_to_elements!(surface_flux_values, mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, dg::DGSEM, cache, mortar, fstar, u_buffer) - @unpack neighbor_ids, node_indices = cache.mortars - - # Copy solution small to small - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - - for position in 1:2 - element = neighbor_ids[position, mortar] - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, small_direction, element] = fstar[position][v, i] - end - end - end - - # Project small fluxes to large element. - multiply_dimensionwise!(u_buffer, - mortar_l2.reverse_upper, fstar[2], - mortar_l2.reverse_lower, fstar[1]) - - # The flux is calculated in the outward direction of the small elements, - # so the sign must be switched to get the flux in outward direction - # of the large element. - # The contravariant vectors of the large element (and therefore the normal - # vectors of the large element as well) are twice as large as the - # contravariant vectors of the small elements. Therefore, the flux needs - # to be scaled by a factor of 2 to obtain the flux of the large element. - u_buffer .*= -2 - - # Copy interpolated flux values from buffer to large element face in the - # correct orientation. - # Note that the index of the small sides will always run forward but - # the index of the large side might need to run backwards for flipped sides. - large_element = neighbor_ids[3, mortar] - large_indices = node_indices[2, mortar] - large_direction = indices2direction(large_indices) - - if :i_backward in large_indices - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, i] - end + @unpack neighbor_ids, node_indices = cache.mortars + + # Copy solution small to small + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + + for position in 1:2 + element = neighbor_ids[position, mortar] + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, small_direction, element] = fstar[position][v, + i] + end + end end - else - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, large_direction, large_element] = u_buffer[v, i] - end + + # Project small fluxes to large element. + multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, fstar[2], + mortar_l2.reverse_lower, fstar[1]) + + # The flux is calculated in the outward direction of the small elements, + # so the sign must be switched to get the flux in outward direction + # of the large element. + # The contravariant vectors of the large element (and therefore the normal + # vectors of the large element as well) are twice as large as the + # contravariant vectors of the small elements. Therefore, the flux needs + # to be scaled by a factor of 2 to obtain the flux of the large element. + u_buffer .*= -2 + + # Copy interpolated flux values from buffer to large element face in the + # correct orientation. + # Note that the index of the small sides will always run forward but + # the index of the large side might need to run backwards for flipped sides. + large_element = neighbor_ids[3, mortar] + large_indices = node_indices[2, mortar] + large_direction = indices2direction(large_indices) + + if :i_backward in large_indices + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, + i] + end + end + else + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, large_direction, large_element] = u_buffer[v, + i] + end + end end - end - return nothing + return nothing end - function calc_surface_integral!(du, u, mesh::P4estMesh{2}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = ( - du[v, 1, l, element] + surface_flux_values[v, l, 1, element] * factor_1) - - # surface at +x - du[v, nnodes(dg), l, element] = ( - du[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2) - - # surface at -y - du[v, l, 1, element] = ( - du[v, l, 1, element] + surface_flux_values[v, l, 3, element] * factor_1) - - # surface at +y - du[v, l, nnodes(dg), element] = ( - du[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2) - end + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl new file mode 100644 index 00000000000..e73a8cda9b8 --- /dev/null +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -0,0 +1,475 @@ +# This method is called when a SemidiscretizationHyperbolicParabolic is constructed. +# It constructs the basic `cache` used throughout the simulation to compute +# the RHS etc. +function create_cache_parabolic(mesh::P4estMesh, equations_hyperbolic::AbstractEquations, + equations_parabolic::AbstractEquationsParabolic, + dg::DG, parabolic_scheme, RealT, uEltype) + balance!(mesh) + + elements = init_elements(mesh, equations_hyperbolic, dg.basis, uEltype) + interfaces = init_interfaces(mesh, equations_hyperbolic, dg.basis, elements) + boundaries = init_boundaries(mesh, equations_hyperbolic, dg.basis, elements) + + n_vars = nvariables(equations_hyperbolic) + n_elements = nelements(elements) + n_nodes = nnodes(dg.basis) # nodes in one direction + u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements) + gradients = ntuple(_ -> similar(u_transformed), ndims(mesh)) + flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh)) + + cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) + + return cache +end + +function calc_gradient!(gradients, u_transformed, t, + mesh::P4estMesh{2}, equations_parabolic, + boundary_conditions_parabolic, dg::DG, + cache, cache_parabolic) + gradients_x, gradients_y = gradients + + # Reset du + @trixi_timeit timer() "reset gradients" begin + reset_du!(gradients_x, dg, cache) + reset_du!(gradients_y, dg, cache) + end + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + (; derivative_dhat) = dg.basis + (; contravariant_vectors) = cache.elements + + @threaded for element in eachelement(dg, cache) + + # Calculate gradients with respect to reference coordinates in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, + equations_parabolic, dg, ii, j, element) + end + + for jj in eachnode(dg) + multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, + equations_parabolic, dg, i, jj, element) + end + end + + # now that the reference coordinate gradients are computed, transform them node-by-node to physical gradients + # using the contravariant vectors + for j in eachnode(dg), i in eachnode(dg) + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, + element) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, + element) + + gradients_reference_1 = get_node_vars(gradients_x, equations_parabolic, dg, + i, j, element) + gradients_reference_2 = get_node_vars(gradients_y, equations_parabolic, dg, + i, j, element) + + # note that the contravariant vectors are transposed compared with computations of flux + # divergences in `calc_volume_integral!`. See + # https://github.com/trixi-framework/Trixi.jl/pull/1490#discussion_r1213345190 + # for a more detailed discussion. + gradient_x_node = Ja11 * gradients_reference_1 + + Ja21 * gradients_reference_2 + gradient_y_node = Ja12 * gradients_reference_1 + + Ja22 * gradients_reference_2 + + set_node_vars!(gradients_x, gradient_x_node, equations_parabolic, dg, i, j, + element) + set_node_vars!(gradients_y, gradient_y_node, equations_parabolic, dg, i, j, + element) + end + end + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, u_transformed, mesh, + equations_parabolic, dg.surface_integral, dg) + end + + # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a + # specialization for AbstractEquationsParabolic. + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache_parabolic.elements.surface_flux_values, + mesh, False(), # False() = no nonconservative terms + equations_parabolic, dg.surface_integral, dg, cache_parabolic) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, u_transformed, mesh, + equations_parabolic, dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_gradients!(cache_parabolic, t, boundary_conditions_parabolic, + mesh, equations_parabolic, dg.surface_integral, dg) + end + + # TODO: parabolic; mortars + @assert nmortars(dg, cache) == 0 + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + (; boundary_interpolation) = dg.basis + (; surface_flux_values) = cache_parabolic.elements + (; contravariant_vectors) = cache.elements + + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + for v in eachvariable(equations_parabolic) + + # Compute x-component of gradients + + # surface at -x + normal_direction_x, _ = get_normal_direction(1, contravariant_vectors, + 1, l, element) + gradients_x[v, 1, l, element] = (gradients_x[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1 * normal_direction_x) + + # surface at +x + normal_direction_x, _ = get_normal_direction(2, contravariant_vectors, + nnodes(dg), l, element) + gradients_x[v, nnodes(dg), l, element] = (gradients_x[v, nnodes(dg), l, + element] + + surface_flux_values[v, l, 2, + element] * + factor_2 * normal_direction_x) + + # surface at -y + normal_direction_x, _ = get_normal_direction(3, contravariant_vectors, + l, 1, element) + gradients_x[v, l, 1, element] = (gradients_x[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1 * normal_direction_x) + + # surface at +y + normal_direction_x, _ = get_normal_direction(4, contravariant_vectors, + l, nnodes(dg), element) + gradients_x[v, l, nnodes(dg), element] = (gradients_x[v, l, nnodes(dg), + element] + + surface_flux_values[v, l, 4, + element] * + factor_2 * normal_direction_x) + + # Compute y-component of gradients + + # surface at -x + _, normal_direction_y = get_normal_direction(1, contravariant_vectors, + 1, l, element) + gradients_y[v, 1, l, element] = (gradients_y[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1 * normal_direction_y) + + # surface at +x + _, normal_direction_y = get_normal_direction(2, contravariant_vectors, + nnodes(dg), l, element) + gradients_y[v, nnodes(dg), l, element] = (gradients_y[v, nnodes(dg), l, + element] + + surface_flux_values[v, l, 2, + element] * + factor_2 * normal_direction_y) + + # surface at -y + _, normal_direction_y = get_normal_direction(3, contravariant_vectors, + l, 1, element) + gradients_y[v, l, 1, element] = (gradients_y[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1 * normal_direction_y) + + # surface at +y + _, normal_direction_y = get_normal_direction(4, contravariant_vectors, + l, nnodes(dg), element) + gradients_y[v, l, nnodes(dg), element] = (gradients_y[v, l, nnodes(dg), + element] + + surface_flux_values[v, l, 4, + element] * + factor_2 * normal_direction_y) + end + end + end + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, + cache_parabolic) + apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, + cache_parabolic) + end + + return nothing +end + +# This version is used for parabolic gradient computations +@inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{2}, + nonconservative_terms::False, + equations::AbstractEquationsParabolic, + surface_integral, dg::DG, cache, + interface_index, normal_direction, + primary_node_index, primary_direction_index, + primary_element_index, + secondary_node_index, secondary_direction_index, + secondary_element_index) + @unpack u = cache.interfaces + @unpack surface_flux = surface_integral + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + interface_index) + + flux_ = 0.5 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux + + # Note that we don't flip the sign on the secondondary flux. This is because for parabolic terms, + # the normals are not embedded in `flux_` for the parabolic gradient computations. + for v in eachvariable(equations) + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v] + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = flux_[v] + end +end + +# This is the version used when calculating the divergence of the viscous fluxes +function calc_volume_integral!(du, flux_viscous, + mesh::P4estMesh{2}, + equations_parabolic::AbstractEquationsParabolic, + dg::DGSEM, cache) + (; derivative_dhat) = dg.basis + (; contravariant_vectors) = cache.elements + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for j in eachnode(dg), i in eachnode(dg) + flux1 = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element) + flux2 = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element) + + # Compute the contravariant flux by taking the scalar product of the + # first contravariant vector Ja^1 and the flux vector + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) + contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[ii, i], contravariant_flux1, + equations_parabolic, dg, ii, j, element) + end + + # Compute the contravariant flux by taking the scalar product of the + # second contravariant vector Ja^2 and the flux vector + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) + contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[jj, j], contravariant_flux2, + equations_parabolic, dg, i, jj, element) + end + end + end + + return nothing +end + +# This is the version used when calculating the divergence of the viscous fluxes +# We pass the `surface_integral` argument solely for dispatch +function prolong2interfaces!(cache_parabolic, flux_viscous, + mesh::P4estMesh{2}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG, cache) + (; interfaces) = cache_parabolic + (; contravariant_vectors) = cache_parabolic.elements + index_range = eachnode(dg) + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for interface in eachinterface(dg, cache) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + primary_element = interfaces.neighbor_ids[1, interface] + primary_indices = interfaces.node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in eachnode(dg) + + # this is the outward normal direction on the primary element + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, primary_element) + + for v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, + primary_element], + flux_viscous_y[v, i_primary, j_primary, + primary_element]) + + interfaces.u[1, v, i, interface] = dot(flux_viscous, normal_direction) + end + i_primary += i_primary_step + j_primary += j_primary_step + end + + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = interfaces.neighbor_ids[2, interface] + secondary_indices = interfaces.node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in eachnode(dg) + # This is the outward normal direction on the secondary element. + # Here, we assume that normal_direction on the secondary element is + # the negative of normal_direction on the primary element. + normal_direction = get_normal_direction(secondary_direction, + contravariant_vectors, + i_secondary, j_secondary, + secondary_element) + + for v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + flux_viscous = SVector(flux_viscous_x[v, i_secondary, j_secondary, + secondary_element], + flux_viscous_y[v, i_secondary, j_secondary, + secondary_element]) + # store the normal flux with respect to the primary normal direction + interfaces.u[2, v, i, interface] = -dot(flux_viscous, normal_direction) + end + i_secondary += i_secondary_step + j_secondary += j_secondary_step + end + end + + return nothing +end + +function calc_interface_flux!(surface_flux_values, + mesh::P4estMesh{2}, equations_parabolic, + dg::DG, cache_parabolic) + (; neighbor_ids, node_indices) = cache_parabolic.interfaces + (; contravariant_vectors) = cache_parabolic.elements + index_range = eachnode(dg) + index_end = last(index_range) + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get element and side index information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction_index = indices2direction(primary_indices) + + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + + # Get element and side index information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction_index = indices2direction(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if :i_backward in secondary_indices + node_secondary = index_end + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end + + for node in eachnode(dg) + # We prolong the viscous flux dotted with respect the outward normal on the + # primary element. We assume a BR-1 type of flux. + viscous_flux_normal_ll, viscous_flux_normal_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, + dg, node, + interface) + + flux = 0.5 * (viscous_flux_normal_ll + viscous_flux_normal_rr) + + for v in eachvariable(equations_parabolic) + surface_flux_values[v, node, primary_direction_index, primary_element] = flux[v] + surface_flux_values[v, node_secondary, secondary_direction_index, secondary_element] = -flux[v] + end + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step + end + end + + return nothing +end + +# TODO: parabolic, finish implementing `calc_boundary_flux_gradients!` and `calc_boundary_flux_divergence!` +function prolong2boundaries!(cache_parabolic, flux_viscous, + mesh::P4estMesh{2}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG, cache) + (; boundaries) = cache_parabolic + (; contravariant_vectors) = cache_parabolic.elements + index_range = eachnode(dg) + + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for boundary in eachboundary(dg, cache_parabolic) + # Copy solution data from the element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + element = boundaries.neighbor_ids[boundary] + node_indices = boundaries.node_indices[boundary] + + i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range) + j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range) + + i_node = i_node_start + j_node = j_node_start + for i in eachnode(dg) + # this is the outward normal direction on the primary element + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_node, j_node, primary_element) + + for v in eachvariable(equations_parabolic) + flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, + primary_element], + flux_viscous_y[v, i_primary, j_primary, + primary_element]) + + boundaries.u[v, i, boundary] = dot(flux_viscous, normal_direction) + end + i_node += i_node_step + j_node += j_node_step + end + end + + return nothing +end diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl index fdfce1deaca..a8887351c46 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl @@ -3,98 +3,105 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function prolong2mpiinterfaces!(cache, u, mesh::ParallelP4estMesh{2}, equations, surface_integral, dg::DG) - @unpack mpi_interfaces = cache - index_range = eachnode(dg) - - @threaded for interface in eachmpiinterface(dg, cache) - # Copy solution data from the local element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - local_side = mpi_interfaces.local_sides[interface] - local_element = mpi_interfaces.local_neighbor_ids[interface] - local_indices = mpi_interfaces.node_indices[interface] - - i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], index_range) - j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], index_range) - - i_element = i_element_start - j_element = j_element_start - for i in eachnode(dg) - for v in eachvariable(equations) - mpi_interfaces.u[local_side, v, i, interface] = u[v, i_element, j_element, local_element] - end - i_element += i_element_step - j_element += j_element_step + @unpack mpi_interfaces = cache + index_range = eachnode(dg) + + @threaded for interface in eachmpiinterface(dg, cache) + # Copy solution data from the local element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + local_side = mpi_interfaces.local_sides[interface] + local_element = mpi_interfaces.local_neighbor_ids[interface] + local_indices = mpi_interfaces.node_indices[interface] + + i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], + index_range) + j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], + index_range) + + i_element = i_element_start + j_element = j_element_start + for i in eachnode(dg) + for v in eachvariable(equations) + mpi_interfaces.u[local_side, v, i, interface] = u[v, i_element, + j_element, + local_element] + end + i_element += i_element_step + j_element += j_element_step + end end - end - return nothing + return nothing end - function calc_mpi_interface_flux!(surface_flux_values, mesh::ParallelP4estMesh{2}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces - @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) - index_end = last(index_range) - - @threaded for interface in eachmpiinterface(dg, cache) - # Get element and side index information on the local element - local_element = local_neighbor_ids[interface] - local_indices = node_indices[interface] - local_direction = indices2direction(local_indices) - local_side = local_sides[interface] - - # Create the local i,j indexing on the local element used to pull normal direction information - i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], index_range) - j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], index_range) - - i_element = i_element_start - j_element = j_element_start - - # Initiate the node index to be used in the surface for loop, - # the surface flux storage must be indexed in alignment with the local element indexing - if :i_backward in local_indices - surface_node = index_end - surface_node_step = -1 - else - surface_node = 1 - surface_node_step = 1 - end + @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + index_end = last(index_range) + + @threaded for interface in eachmpiinterface(dg, cache) + # Get element and side index information on the local element + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + + # Create the local i,j indexing on the local element used to pull normal direction information + i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], + index_range) + j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], + index_range) + + i_element = i_element_start + j_element = j_element_start + + # Initiate the node index to be used in the surface for loop, + # the surface flux storage must be indexed in alignment with the local element indexing + if :i_backward in local_indices + surface_node = index_end + surface_node_step = -1 + else + surface_node = 1 + surface_node_step = 1 + end - for node in eachnode(dg) - # Get the normal direction on the local element - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(local_direction, contravariant_vectors, - i_element, j_element, local_element) - - calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - interface, normal_direction, - node, local_side, - surface_node, local_direction, local_element) - - # Increment local element indices to pull the normal direction - i_element += i_element_step - j_element += j_element_step - - # Increment the surface node index along the local element - surface_node += surface_node_step + for node in eachnode(dg) + # Get the normal direction on the local element + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_element, j_element, local_element) + + calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms, + equations, + surface_integral, dg, cache, + interface, normal_direction, + node, local_side, + surface_node, local_direction, local_element) + + # Increment local element indices to pull the normal direction + i_element += i_element_step + j_element += j_element_step + + # Increment the surface node index along the local element + surface_node += surface_node_step + end end - end - return nothing + return nothing end # Inlined version of the interface flux computation for conservation laws @@ -104,138 +111,146 @@ end surface_integral, dg::DG, cache, interface_index, normal_direction, interface_node_index, local_side, - surface_node_index, local_direction_index, local_element_index) - @unpack u = cache.mpi_interfaces - @unpack surface_flux = surface_integral - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface_node_index, interface_index) - - if local_side == 1 - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - else # local_side == 2 - flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) - end + surface_node_index, local_direction_index, + local_element_index) + @unpack u = cache.mpi_interfaces + @unpack surface_flux = surface_integral + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface_node_index, + interface_index) + + if local_side == 1 + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + else # local_side == 2 + flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) + end - for v in eachvariable(equations) - surface_flux_values[v, surface_node_index, local_direction_index, local_element_index] = flux_[v] - end + for v in eachvariable(equations) + surface_flux_values[v, surface_node_index, local_direction_index, local_element_index] = flux_[v] + end end - function prolong2mpimortars!(cache, u, mesh::ParallelP4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - @unpack node_indices = cache.mpi_mortars - index_range = eachnode(dg) - - @threaded for mortar in eachmpimortar(dg, cache) - local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] - local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] - - # Get start value and step size for indices on both sides to get the correct face - # and orientation - small_indices = node_indices[1, mortar] - i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) - j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range) - - large_indices = node_indices[2, mortar] - i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], index_range) - j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], index_range) - - for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) - if position == 3 # -> large element - # Buffer to copy solution values of the large element in the correct orientation - # before interpolating - u_buffer = cache.u_threaded[Threads.threadid()] - i_large = i_large_start - j_large = j_large_start - for i in eachnode(dg) - for v in eachvariable(equations) - u_buffer[v, i] = u[v, i_large, j_large, element] - end - - i_large += i_large_step - j_large += j_large_step + @unpack node_indices = cache.mpi_mortars + index_range = eachnode(dg) + + @threaded for mortar in eachmpimortar(dg, cache) + local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] + local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] + + # Get start value and step size for indices on both sides to get the correct face + # and orientation + small_indices = node_indices[1, mortar] + i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], + index_range) + + large_indices = node_indices[2, mortar] + i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], + index_range) + j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], + index_range) + + for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) + if position == 3 # -> large element + # Buffer to copy solution values of the large element in the correct orientation + # before interpolating + u_buffer = cache.u_threaded[Threads.threadid()] + i_large = i_large_start + j_large = j_large_start + for i in eachnode(dg) + for v in eachvariable(equations) + u_buffer[v, i] = u[v, i_large, j_large, element] + end + + i_large += i_large_step + j_large += j_large_step + end + + # Interpolate large element face data from buffer to small face locations + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, mortar), + mortar_l2.forward_lower, + u_buffer) + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, mortar), + mortar_l2.forward_upper, + u_buffer) + else # position in (1, 2) -> small element + # Copy solution data from the small elements + i_small = i_small_start + j_small = j_small_start + for i in eachnode(dg) + for v in eachvariable(equations) + cache.mpi_mortars.u[1, v, position, i, mortar] = u[v, i_small, + j_small, + element] + end + i_small += i_small_step + j_small += j_small_step + end + end end - - # Interpolate large element face data from buffer to small face locations - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, mortar), - mortar_l2.forward_lower, - u_buffer) - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, mortar), - mortar_l2.forward_upper, - u_buffer) - else # position in (1, 2) -> small element - # Copy solution data from the small elements - i_small = i_small_start - j_small = j_small_start - for i in eachnode(dg) - for v in eachvariable(equations) - cache.mpi_mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, element] - end - i_small += i_small_step - j_small += j_small_step - end - end end - end - return nothing + return nothing end - function calc_mpi_mortar_flux!(surface_flux_values, mesh::ParallelP4estMesh{2}, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars - @unpack contravariant_vectors = cache.elements - @unpack fstar_upper_threaded, fstar_lower_threaded = cache - index_range = eachnode(dg) - - @threaded for mortar in eachmpimortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar = (fstar_lower_threaded[Threads.threadid()], - fstar_upper_threaded[Threads.threadid()]) - - # Get index information on the small elements - small_indices = node_indices[1, mortar] - - i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range) - j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range) - - for position in 1:2 - i_small = i_small_start - j_small = j_small_start - for node in eachnode(dg) - # Get the normal direction on the small element. - normal_direction = get_normal_direction(cache.mpi_mortars, node, position, mortar) + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars + @unpack contravariant_vectors = cache.elements + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + index_range = eachnode(dg) + + @threaded for mortar in eachmpimortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar = (fstar_lower_threaded[Threads.threadid()], + fstar_upper_threaded[Threads.threadid()]) + + # Get index information on the small elements + small_indices = node_indices[1, mortar] + + i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], + index_range) + + for position in 1:2 + i_small = i_small_start + j_small = j_small_start + for node in eachnode(dg) + # Get the normal direction on the small element. + normal_direction = get_normal_direction(cache.mpi_mortars, node, + position, mortar) + + calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + surface_integral, dg, cache, + mortar, position, normal_direction, + node) + + i_small += i_small_step + j_small += j_small_step + end + end - calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - mortar, position, normal_direction, - node) + # Buffer to interpolate flux values of the large element to before + # copying in the correct orientation + u_buffer = cache.u_threaded[Threads.threadid()] - i_small += i_small_step - j_small += j_small_step - end + mpi_mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar, u_buffer) end - # Buffer to interpolate flux values of the large element to before - # copying in the correct orientation - u_buffer = cache.u_threaded[Threads.threadid()] - - mpi_mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer) - end - - return nothing + return nothing end - # Inlined version of the mortar flux computation on small elements for conservation laws @inline function calc_mpi_mortar_flux!(fstar, mesh::ParallelP4estMesh{2}, @@ -243,72 +258,75 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, node_index) - @unpack u = cache.mpi_mortars - @unpack surface_flux = surface_integral + @unpack u = cache.mpi_mortars + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, + mortar_index) - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Copy flux to buffer - set_node_vars!(fstar[position_index], flux, equations, dg, node_index) + # Copy flux to buffer + set_node_vars!(fstar[position_index], flux, equations, dg, node_index) end - @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values, mesh::ParallelP4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, u_buffer) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars - - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - large_indices = node_indices[2, mortar] - large_direction = indices2direction(large_indices) - - for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar]) - if position == 3 # -> large element - # Project small fluxes to large element. - multiply_dimensionwise!(u_buffer, - mortar_l2.reverse_upper, fstar[2], - mortar_l2.reverse_lower, fstar[1]) - # The flux is calculated in the outward direction of the small elements, - # so the sign must be switched to get the flux in outward direction - # of the large element. - # The contravariant vectors of the large element (and therefore the normal - # vectors of the large element as well) are twice as large as the - # contravariant vectors of the small elements. Therefore, the flux needs - # to be scaled by a factor of 2 to obtain the flux of the large element. - u_buffer .*= -2 - # Copy interpolated flux values from buffer to large element face in the - # correct orientation. - # Note that the index of the small sides will always run forward but - # the index of the large side might need to run backwards for flipped sides. - if :i_backward in large_indices - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v, i] - end - end - else - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, large_direction, element] = u_buffer[v, i] - end - end - end - else # position in (1, 2) -> small element - # Copy solution small to small - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, small_direction, element] = fstar[position][v, i] + dg::DGSEM, cache, mortar, fstar, + u_buffer) + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars + + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + large_indices = node_indices[2, mortar] + large_direction = indices2direction(large_indices) + + for (element, position) in zip(local_neighbor_ids[mortar], + local_neighbor_positions[mortar]) + if position == 3 # -> large element + # Project small fluxes to large element. + multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, fstar[2], + mortar_l2.reverse_lower, fstar[1]) + # The flux is calculated in the outward direction of the small elements, + # so the sign must be switched to get the flux in outward direction + # of the large element. + # The contravariant vectors of the large element (and therefore the normal + # vectors of the large element as well) are twice as large as the + # contravariant vectors of the small elements. Therefore, the flux needs + # to be scaled by a factor of 2 to obtain the flux of the large element. + u_buffer .*= -2 + # Copy interpolated flux values from buffer to large element face in the + # correct orientation. + # Note that the index of the small sides will always run forward but + # the index of the large side might need to run backwards for flipped sides. + if :i_backward in large_indices + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v, + i] + end + end + else + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, large_direction, element] = u_buffer[v, + i] + end + end + end + else # position in (1, 2) -> small element + # Copy solution small to small + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, small_direction, element] = fstar[position][v, + i] + end + end end - end end - end - return nothing + return nothing end - - -end # muladd \ No newline at end of file +end # muladd diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index d5e32ca64e4..dc69329474f 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -3,24 +3,27 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # The methods below are specialized on the mortar type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype) - # TODO: Taal compare performance of different types - fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] - - fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - - (; fstar_threaded, fstar_tmp_threaded, u_threaded) +function create_cache(mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, + uEltype) + # TODO: Taal compare performance of different types + fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2), 4) + for _ in 1:Threads.nthreads()] + + fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), + nnodes(mortar_l2), nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + + (; fstar_threaded, fstar_tmp_threaded, u_threaded) end - # index_to_start_step_3d(index::Symbol, index_range) # # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`), @@ -51,179 +54,194 @@ end # k_volume += k_volume_step_j # end @inline function index_to_start_step_3d(index::Symbol, index_range) - index_begin = first(index_range) - index_end = last(index_range) - - if index === :begin - return index_begin, 0, 0 - elseif index === :end - return index_end, 0, 0 - elseif index === :i_forward - return index_begin, 1, index_begin - index_end - 1 - elseif index === :i_backward - return index_end, -1, index_end + 1 - index_begin - elseif index === :j_forward - return index_begin, 0, 1 - else # if index === :j_backward - return index_end, 0, -1 - end + index_begin = first(index_range) + index_end = last(index_range) + + if index === :begin + return index_begin, 0, 0 + elseif index === :end + return index_end, 0, 0 + elseif index === :i_forward + return index_begin, 1, index_begin - index_end - 1 + elseif index === :i_backward + return index_end, -1, index_end + 1 - index_begin + elseif index === :j_forward + return index_begin, 0, 1 + else # if index === :j_backward + return index_end, 0, -1 + end end # Extract the two varying indices from a symbolic index tuple. # For example, `surface_indices((:i_forward, :end, :j_forward)) == (:i_forward, :j_forward)`. @inline function surface_indices(indices::NTuple{3, Symbol}) - i1, i2, i3 = indices - index = i1 - (index === :begin || index === :end) && return (i2, i3) + i1, i2, i3 = indices + index = i1 + (index === :begin || index === :end) && return (i2, i3) - index = i2 - (index === :begin || index === :end) && return (i1, i3) + index = i2 + (index === :begin || index === :end) && return (i1, i3) - # i3 in (:begin, :end) - return (i1, i2) + # i3 in (:begin, :end) + return (i1, i2) end # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::P4estMesh{3}, equations, surface_integral, dg::DG) - @unpack interfaces = cache - index_range = eachnode(dg) - - @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the indices of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, i_primary, j_primary, k_primary, primary_element] + @unpack interfaces = cache + index_range = eachnode(dg) + + @threaded for interface in eachinterface(dg, cache) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = interfaces.neighbor_ids[1, interface] + primary_indices = interfaces.node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + interfaces.u[1, v, i, j, interface] = u[v, i_primary, j_primary, + k_primary, primary_element] + end + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i + end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j end - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - end - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j - end - - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], index_range) - k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start - k_secondary = k_secondary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, j, interface] = u[v, i_secondary, j_secondary, k_secondary, secondary_element] + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = interfaces.neighbor_ids[2, interface] + secondary_indices = interfaces.node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + index_range) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + interfaces.u[2, v, i, j, interface] = u[v, i_secondary, j_secondary, + k_secondary, + secondary_element] + end + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i + end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j end - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - k_secondary += k_secondary_step_i - end - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j - k_secondary += k_secondary_step_j end - end - return nothing + return nothing end - function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.interfaces - @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) - - @threaded for interface in eachinterface(dg, cache) - # Get element and side information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - - # Get element and side information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - secondary_surface_indices = surface_indices(secondary_indices) - - # Get the surface indexing on the secondary element. - # Note that the indices of the primary side will always run forward but - # the secondary indices might need to run backwards for flipped sides. - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start - - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction from the primary element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, contravariant_vectors, - i_primary, j_primary, k_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, primary_direction, primary_element, - i_secondary, j_secondary, secondary_direction, secondary_element) - - # Increment the primary element indices - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - # Increment the secondary element surface indices - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - end - # Increment the primary element indices - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j - # Increment the secondary element surface indices - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + @threaded for interface in eachinterface(dg, cache) + # Get element and side information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + index_range) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in eachnode(dg) + for i in eachnode(dg) + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, + equations, + surface_integral, dg, cache, + interface, normal_direction, + i, j, primary_direction, primary_element, + i_secondary, j_secondary, secondary_direction, + secondary_element) + + # Increment the primary element indices + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i + # Increment the secondary element surface indices + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + end end - end - return nothing + return nothing end - # Inlined function for interface flux computation for conservative flux terms @inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3}, @@ -233,20 +251,22 @@ end primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, - secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces - @unpack surface_flux = surface_integral + secondary_direction_index, + secondary_element_index) + @unpack u = cache.interfaces + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, primary_j_node_index, interface_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + primary_j_node_index, interface_index) - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - for v in eachvariable(equations) - surface_flux_values[v, primary_i_node_index, primary_j_node_index, - primary_direction_index, primary_element_index] = flux_[v] - surface_flux_values[v, secondary_i_node_index, secondary_j_node_index, - secondary_direction_index, secondary_element_index] = -flux_[v] - end + for v in eachvariable(equations) + surface_flux_values[v, primary_i_node_index, primary_j_node_index, + primary_direction_index, primary_element_index] = flux_[v] + surface_flux_values[v, secondary_i_node_index, secondary_j_node_index, + secondary_direction_index, secondary_element_index] = -flux_[v] + end end # Inlined function for interface flux computation for flux + nonconservative terms @@ -258,289 +278,314 @@ end primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, - secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces - surface_flux, nonconservative_flux = surface_integral.surface_flux - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, primary_j_node_index, interface_index) - - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - - # Compute both nonconservative fluxes - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `normal_direction` twice. - noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - # Store the flux with nonconservative terms on the primary and secondary elements - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, primary_i_node_index, primary_j_node_index, - primary_direction_index, primary_element_index] = flux_[v] + 0.5 * noncons_primary[v] - surface_flux_values[v, secondary_i_node_index, secondary_j_node_index, - secondary_direction_index, secondary_element_index] = -(flux_[v] + 0.5 * noncons_secondary[v]) - end + secondary_direction_index, + secondary_element_index) + @unpack u = cache.interfaces + surface_flux, nonconservative_flux = surface_integral.surface_flux + + u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + primary_j_node_index, interface_index) + + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + + # Compute both nonconservative fluxes + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `normal_direction` twice. + noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + # Store the flux with nonconservative terms on the primary and secondary elements + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, primary_i_node_index, primary_j_node_index, + primary_direction_index, primary_element_index] = flux_[v] + + 0.5 * noncons_primary[v] + surface_flux_values[v, secondary_i_node_index, secondary_j_node_index, + secondary_direction_index, secondary_element_index] = -(flux_[v] + + 0.5 * + noncons_secondary[v]) + end end - function prolong2boundaries!(cache, u, mesh::P4estMesh{3}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - index_range = eachnode(dg) - - @threaded for boundary in eachboundary(dg, cache) - # Copy solution data from the element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - element = boundaries.neighbor_ids[boundary] - node_indices = boundaries.node_indices[boundary] - - i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], index_range) - j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], index_range) - k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], index_range) - - i_node = i_node_start - j_node = j_node_start - k_node = k_node_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - boundaries.u[v, i, j, boundary] = u[v, i_node, j_node, k_node, element] + @unpack boundaries = cache + index_range = eachnode(dg) + + @threaded for boundary in eachboundary(dg, cache) + # Copy solution data from the element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + element = boundaries.neighbor_ids[boundary] + node_indices = boundaries.node_indices[boundary] + + i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], + index_range) + j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], + index_range) + k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], + index_range) + + i_node = i_node_start + j_node = j_node_start + k_node = k_node_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + boundaries.u[v, i, j, boundary] = u[v, i_node, j_node, k_node, + element] + end + i_node += i_node_step_i + j_node += j_node_step_i + k_node += k_node_step_i + end + i_node += i_node_step_j + j_node += j_node_step_j + k_node += k_node_step_j end - i_node += i_node_step_i - j_node += j_node_step_i - k_node += k_node_step_i - end - i_node += i_node_step_j - j_node += j_node_step_j - k_node += k_node_step_j end - end - return nothing + return nothing end - function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, mesh::P4estMesh{3}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements - @unpack surface_flux = surface_integral - index_range = eachnode(dg) - - @threaded for local_index in eachindex(boundary_indexing) - # Use the local index to get the global boundary index from the - # pre-sorted list - boundary = boundary_indexing[local_index] - - # Get information on the adjacent element, compute the surface fluxes, - # and store them - element = boundaries.neighbor_ids[boundary] - node_indices = boundaries.node_indices[boundary] - direction = indices2direction(node_indices) - - i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], index_range) - j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], index_range) - k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], index_range) - - i_node = i_node_start - j_node = j_node_start - k_node = k_node_start - for j in eachnode(dg) - for i in eachnode(dg) - # Extract solution data from boundary container - u_inner = get_node_vars(boundaries.u, equations, dg, i, j, boundary) - - # Outward-pointing normal direction (not normalized) - normal_direction = get_normal_direction(direction, contravariant_vectors, - i_node, j_node, k_node, element) - - # Coordinates at boundary node - x = get_node_coords(node_coordinates, equations, dg, - i_node, j_node, k_node, element) - - flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations) - - # Copy flux to element storage in the correct orientation - for v in eachvariable(equations) - surface_flux_values[v, i, j, direction, element] = flux_[v] + @unpack boundaries = cache + @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements + @unpack surface_flux = surface_integral + index_range = eachnode(dg) + + @threaded for local_index in eachindex(boundary_indexing) + # Use the local index to get the global boundary index from the + # pre-sorted list + boundary = boundary_indexing[local_index] + + # Get information on the adjacent element, compute the surface fluxes, + # and store them + element = boundaries.neighbor_ids[boundary] + node_indices = boundaries.node_indices[boundary] + direction = indices2direction(node_indices) + + i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], + index_range) + j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], + index_range) + k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], + index_range) + + i_node = i_node_start + j_node = j_node_start + k_node = k_node_start + for j in eachnode(dg) + for i in eachnode(dg) + # Extract solution data from boundary container + u_inner = get_node_vars(boundaries.u, equations, dg, i, j, boundary) + + # Outward-pointing normal direction (not normalized) + normal_direction = get_normal_direction(direction, + contravariant_vectors, + i_node, j_node, k_node, element) + + # Coordinates at boundary node + x = get_node_coords(node_coordinates, equations, dg, + i_node, j_node, k_node, element) + + flux_ = boundary_condition(u_inner, normal_direction, x, t, + surface_flux, equations) + + # Copy flux to element storage in the correct orientation + for v in eachvariable(equations) + surface_flux_values[v, i, j, direction, element] = flux_[v] + end + + i_node += i_node_step_i + j_node += j_node_step_i + k_node += k_node_step_i + end + i_node += i_node_step_j + j_node += j_node_step_j + k_node += k_node_step_j end - - i_node += i_node_step_i - j_node += j_node_step_i - k_node += k_node_step_i - end - i_node += i_node_step_j - j_node += j_node_step_j - k_node += k_node_step_j end - end end - function prolong2mortars!(cache, u, mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - @unpack fstar_tmp_threaded = cache - @unpack neighbor_ids, node_indices = cache.mortars - index_range = eachnode(dg) - - @threaded for mortar in eachmortar(dg, cache) - # Copy solution data from the small elements using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - small_indices = node_indices[1, mortar] - - i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range) - j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range) - k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range) - - for position in 1:4 - i_small = i_small_start - j_small = j_small_start - k_small = k_small_start - element = neighbor_ids[position, mortar] - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u[1, v, position, i, j, mortar] = u[v, i_small, j_small, k_small, element] - end - i_small += i_small_step_i - j_small += j_small_step_i - k_small += k_small_step_i + @unpack fstar_tmp_threaded = cache + @unpack neighbor_ids, node_indices = cache.mortars + index_range = eachnode(dg) + + @threaded for mortar in eachmortar(dg, cache) + # Copy solution data from the small elements using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + small_indices = node_indices[1, mortar] + + i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], + index_range) + j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], + index_range) + k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], + index_range) + + for position in 1:4 + i_small = i_small_start + j_small = j_small_start + k_small = k_small_start + element = neighbor_ids[position, mortar] + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u[1, v, position, i, j, mortar] = u[v, i_small, + j_small, + k_small, + element] + end + i_small += i_small_step_i + j_small += j_small_step_i + k_small += k_small_step_i + end + i_small += i_small_step_j + j_small += j_small_step_j + k_small += k_small_step_j + end end - i_small += i_small_step_j - j_small += j_small_step_j - k_small += k_small_step_j - end - end - - - # Buffer to copy solution values of the large element in the correct orientation - # before interpolating - u_buffer = cache.u_threaded[Threads.threadid()] - # temporary buffer for projections - fstar_tmp = fstar_tmp_threaded[Threads.threadid()] - - # Copy solution of large element face to buffer in the - # correct orientation - large_indices = node_indices[2, mortar] - i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], index_range) - j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], index_range) - k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], index_range) - - i_large = i_large_start - j_large = j_large_start - k_large = k_large_start - element = neighbor_ids[5, mortar] - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element] + # Buffer to copy solution values of the large element in the correct orientation + # before interpolating + u_buffer = cache.u_threaded[Threads.threadid()] + # temporary buffer for projections + fstar_tmp = fstar_tmp_threaded[Threads.threadid()] + + # Copy solution of large element face to buffer in the + # correct orientation + large_indices = node_indices[2, mortar] + + i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], + index_range) + j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], + index_range) + k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], + index_range) + + i_large = i_large_start + j_large = j_large_start + k_large = k_large_start + element = neighbor_ids[5, mortar] + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element] + end + i_large += i_large_step_i + j_large += j_large_step_i + k_large += k_large_step_i + end + i_large += i_large_step_j + j_large += j_large_step_j + k_large += k_large_step_j end - i_large += i_large_step_i - j_large += j_large_step_i - k_large += k_large_step_i - end - i_large += i_large_step_j - j_large += j_large_step_j - k_large += k_large_step_j - end - # Interpolate large element face data from buffer to small face locations - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, :, mortar), - mortar_l2.forward_lower, - mortar_l2.forward_lower, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, :, mortar), - mortar_l2.forward_upper, - mortar_l2.forward_lower, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 3, :, :, mortar), - mortar_l2.forward_lower, - mortar_l2.forward_upper, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mortars.u, 2, :, 4, :, :, mortar), - mortar_l2.forward_upper, - mortar_l2.forward_upper, - u_buffer, - fstar_tmp) - end + # Interpolate large element face data from buffer to small face locations + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, :, mortar), + mortar_l2.forward_lower, + mortar_l2.forward_lower, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, :, mortar), + mortar_l2.forward_upper, + mortar_l2.forward_lower, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 3, :, :, mortar), + mortar_l2.forward_lower, + mortar_l2.forward_upper, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mortars.u, 2, :, 4, :, :, mortar), + mortar_l2.forward_upper, + mortar_l2.forward_upper, + u_buffer, + fstar_tmp) + end - return nothing + return nothing end - function calc_mortar_flux!(surface_flux_values, mesh::P4estMesh{3}, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack neighbor_ids, node_indices = cache.mortars - @unpack contravariant_vectors = cache.elements - @unpack fstar_threaded, fstar_tmp_threaded = cache - index_range = eachnode(dg) - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar = fstar_threaded[Threads.threadid()] - fstar_tmp = fstar_tmp_threaded[Threads.threadid()] - - # Get index information on the small elements - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - - i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range) - j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range) - k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range) - - for position in 1:4 - i_small = i_small_start - j_small = j_small_start - k_small = k_small_start - element = neighbor_ids[position, mortar] - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction on the small element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(small_direction, contravariant_vectors, - i_small, j_small, k_small, element) - - calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - mortar, position, normal_direction, - i, j) - - i_small += i_small_step_i - j_small += j_small_step_i - k_small += k_small_step_i + @unpack neighbor_ids, node_indices = cache.mortars + @unpack contravariant_vectors = cache.elements + @unpack fstar_threaded, fstar_tmp_threaded = cache + index_range = eachnode(dg) + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar = fstar_threaded[Threads.threadid()] + fstar_tmp = fstar_tmp_threaded[Threads.threadid()] + + # Get index information on the small elements + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + + i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], + index_range) + j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], + index_range) + k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], + index_range) + + for position in 1:4 + i_small = i_small_start + j_small = j_small_start + k_small = k_small_start + element = neighbor_ids[position, mortar] + for j in eachnode(dg) + for i in eachnode(dg) + # Get the normal direction on the small element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(small_direction, + contravariant_vectors, + i_small, j_small, k_small, + element) + + calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + surface_integral, dg, cache, + mortar, position, normal_direction, + i, j) + + i_small += i_small_step_i + j_small += j_small_step_i + k_small += k_small_step_i + end + i_small += i_small_step_j + j_small += j_small_step_j + k_small += k_small_step_j + end end - i_small += i_small_step_j - j_small += j_small_step_j - k_small += k_small_step_j - end - end - # Buffer to interpolate flux values of the large element to before - # copying in the correct orientation - u_buffer = cache.u_threaded[Threads.threadid()] + # Buffer to interpolate flux values of the large element to before + # copying in the correct orientation + u_buffer = cache.u_threaded[Threads.threadid()] - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer, fstar_tmp) - end + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar, u_buffer, fstar_tmp) + end - return nothing + return nothing end # Inlined version of the mortar flux computation on small elements for conservation fluxes @@ -550,15 +595,17 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, i_node_index, j_node_index) - @unpack u = cache.mortars - @unpack surface_flux = surface_integral + @unpack u = cache.mortars + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, + j_node_index, mortar_index) - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Copy flux to buffer - set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, position_index) + # Copy flux to buffer + set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, + position_index) end # Inlined version of the mortar flux computation on small elements for conservation fluxes @@ -569,154 +616,164 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, i_node_index, j_node_index) - @unpack u = cache.mortars - surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u = cache.mortars + surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, + j_node_index, mortar_index) - # Compute conservative flux - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + # Compute conservative flux + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Compute nonconservative flux and add it to the flux scaled by a factor of 0.5 based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - flux_plus_noncons = flux + 0.5 * noncons + # Compute nonconservative flux and add it to the flux scaled by a factor of 0.5 based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, + equations) + flux_plus_noncons = flux + 0.5 * noncons - # Copy to buffer - set_node_vars!(fstar, flux_plus_noncons, equations, dg, i_node_index, j_node_index, position_index) + # Copy to buffer + set_node_vars!(fstar, flux_plus_noncons, equations, dg, i_node_index, j_node_index, + position_index) end - @inline function mortar_fluxes_to_elements!(surface_flux_values, mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, u_buffer, fstar_tmp) - @unpack neighbor_ids, node_indices = cache.mortars - index_range = eachnode(dg) - - # Copy solution small to small - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - - for position in 1:4 - element = neighbor_ids[position, mortar] - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, position] - end + dg::DGSEM, cache, mortar, fstar, u_buffer, + fstar_tmp) + @unpack neighbor_ids, node_indices = cache.mortars + index_range = eachnode(dg) + + # Copy solution small to small + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + + for position in 1:4 + element = neighbor_ids[position, mortar] + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, + position] + end + end end - end - - # Project small fluxes to large element. - multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_lower, mortar_l2.reverse_lower, - view(fstar, .., 1), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_upper, mortar_l2.reverse_lower, - view(fstar, .., 2), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_lower, mortar_l2.reverse_upper, - view(fstar, .., 3), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_upper, mortar_l2.reverse_upper, - view(fstar, .., 4), - fstar_tmp) - - # The flux is calculated in the outward direction of the small elements, - # so the sign must be switched to get the flux in outward direction - # of the large element. - # The contravariant vectors of the large element (and therefore the normal - # vectors of the large element as well) are four times as large as the - # contravariant vectors of the small elements. Therefore, the flux needs - # to be scaled by a factor of 4 to obtain the flux of the large element. - u_buffer .*= -4 - - # Copy interpolated flux values from buffer to large element face in the - # correct orientation. - # Note that the index of the small sides will always run forward but - # the index of the large side might need to run backwards for flipped sides. - large_element = neighbor_ids[5, mortar] - large_indices = node_indices[2, mortar] - large_direction = indices2direction(large_indices) - large_surface_indices = surface_indices(large_indices) - - i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], index_range) - j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], index_range) - - # Note that the indices of the small sides will always run forward but - # the large indices might need to run backwards for flipped sides. - i_large = i_large_start - j_large = j_large_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i_large, j_large, large_direction, large_element] = u_buffer[v, i, j] - end - i_large += i_large_step_i - j_large += j_large_step_i + + # Project small fluxes to large element. + multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_lower, mortar_l2.reverse_lower, + view(fstar, .., 1), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, mortar_l2.reverse_lower, + view(fstar, .., 2), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_lower, mortar_l2.reverse_upper, + view(fstar, .., 3), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, mortar_l2.reverse_upper, + view(fstar, .., 4), + fstar_tmp) + + # The flux is calculated in the outward direction of the small elements, + # so the sign must be switched to get the flux in outward direction + # of the large element. + # The contravariant vectors of the large element (and therefore the normal + # vectors of the large element as well) are four times as large as the + # contravariant vectors of the small elements. Therefore, the flux needs + # to be scaled by a factor of 4 to obtain the flux of the large element. + u_buffer .*= -4 + + # Copy interpolated flux values from buffer to large element face in the + # correct orientation. + # Note that the index of the small sides will always run forward but + # the index of the large side might need to run backwards for flipped sides. + large_element = neighbor_ids[5, mortar] + large_indices = node_indices[2, mortar] + large_direction = indices2direction(large_indices) + large_surface_indices = surface_indices(large_indices) + + i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], + index_range) + j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], + index_range) + + # Note that the indices of the small sides will always run forward but + # the large indices might need to run backwards for flipped sides. + i_large = i_large_start + j_large = j_large_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i_large, j_large, large_direction, large_element] = u_buffer[v, + i, + j] + end + i_large += i_large_step_i + j_large += j_large_step_i + end + i_large += i_large_step_j + j_large += j_large_step_j end - i_large += i_large_step_j - j_large += j_large_step_j - end - return nothing + return nothing end - function calc_surface_integral!(du, u, mesh::P4estMesh{3}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = ( - du[v, 1, l, m, element] + surface_flux_values[v, l, m, 1, element] * factor_1) - - # surface at +x - du[v, nnodes(dg), l, m, element] = ( - du[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2) - - # surface at -y - du[v, l, 1, m, element] = ( - du[v, l, 1, m, element] + surface_flux_values[v, l, m, 3, element] * factor_1) - - # surface at +y - du[v, l, nnodes(dg), m, element] = ( - du[v, l, nnodes(dg), m, element] + surface_flux_values[v, l, m, 4, element] * factor_2) - - # surface at -z - du[v, l, m, 1, element] = ( - du[v, l, m, 1, element] + surface_flux_values[v, l, m, 5, element] * factor_1) - - # surface at +z - du[v, l, m, nnodes(dg), element] = ( - du[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2) - end + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, element] * + factor_1) + + # surface at +z + du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + + surface_flux_values[v, l, m, 6, + element] * + factor_2) + end + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 5c77247ac6d..13bf2a1a2eb 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -3,207 +3,236 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function rhs!(du, u, t, mesh::ParallelP4estMesh{3}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Start to receive MPI data - @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) - - # Prolong solution to MPI interfaces - @trixi_timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to MPI mortars - @trixi_timeit timer() "prolong2mpimortars" prolong2mpimortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Start to send MPI data - @trixi_timeit timer() "start MPI send" start_mpi_send!( - cache.mpi_cache, mesh, equations, dg, cache) - - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" prolong2mortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" calc_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Finish to receive MPI data - @trixi_timeit timer() "finish MPI receive" finish_mpi_receive!( - cache.mpi_cache, mesh, equations, dg, cache) - - # Calculate MPI interface fluxes - @trixi_timeit timer() "MPI interface flux" calc_mpi_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Calculate MPI mortar fluxes - @trixi_timeit timer() "MPI mortar flux" calc_mpi_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - # Finish to send MPI data - @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache) - - return nothing -end + # Start to receive MPI data + @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) + + # Prolong solution to MPI interfaces + @trixi_timeit timer() "prolong2mpiinterfaces" begin + prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg) + end + # Prolong solution to MPI mortars + @trixi_timeit timer() "prolong2mpimortars" begin + prolong2mpimortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + + # Start to send MPI data + @trixi_timeit timer() "start MPI send" begin + start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache) + end + + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Prolong solution to mortars + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + + # Finish to receive MPI data + @trixi_timeit timer() "finish MPI receive" begin + finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache) + end + + # Calculate MPI interface fluxes + @trixi_timeit timer() "MPI interface flux" begin + calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Calculate MPI mortar fluxes + @trixi_timeit timer() "MPI mortar flux" begin + calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + # Finish to send MPI data + @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache) + + return nothing +end function prolong2mpiinterfaces!(cache, u, mesh::ParallelP4estMesh{3}, equations, surface_integral, dg::DG) - @unpack mpi_interfaces = cache - index_range = eachnode(dg) - - @threaded for interface in eachmpiinterface(dg, cache) - # Copy solution data from the local element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - local_side = mpi_interfaces.local_sides[interface] - local_element = mpi_interfaces.local_neighbor_ids[interface] - local_indices = mpi_interfaces.node_indices[interface] - - i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], index_range) - j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], index_range) - k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], index_range) - - i_element = i_element_start - j_element = j_element_start - k_element = k_element_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element, j_element, k_element, local_element] + @unpack mpi_interfaces = cache + index_range = eachnode(dg) + + @threaded for interface in eachmpiinterface(dg, cache) + # Copy solution data from the local element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + local_side = mpi_interfaces.local_sides[interface] + local_element = mpi_interfaces.local_neighbor_ids[interface] + local_indices = mpi_interfaces.node_indices[interface] + + i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], + index_range) + j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], + index_range) + k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], + index_range) + + i_element = i_element_start + j_element = j_element_start + k_element = k_element_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element, + j_element, + k_element, + local_element] + end + i_element += i_element_step_i + j_element += j_element_step_i + k_element += k_element_step_i + end + i_element += i_element_step_j + j_element += j_element_step_j + k_element += k_element_step_j end - i_element += i_element_step_i - j_element += j_element_step_i - k_element += k_element_step_i - end - i_element += i_element_step_j - j_element += j_element_step_j - k_element += k_element_step_j end - end - return nothing + return nothing end - function calc_mpi_interface_flux!(surface_flux_values, mesh::ParallelP4estMesh{3}, nonconservative_terms, equations, surface_integral, dg::DG, cache) - @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces - @unpack contravariant_vectors = cache.elements - index_range = eachnode(dg) - - @threaded for interface in eachmpiinterface(dg, cache) - # Get element and side index information on the local element - local_element = local_neighbor_ids[interface] - local_indices = node_indices[interface] - local_direction = indices2direction(local_indices) - local_side = local_sides[interface] - - # Create the local i,j,k indexing on the local element used to pull normal direction information - i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], index_range) - j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], index_range) - k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], index_range) - - i_element = i_element_start - j_element = j_element_start - k_element = k_element_start - - # Initiate the node indices to be used in the surface for loop, - # the surface flux storage must be indexed in alignment with the local element indexing - local_surface_indices = surface_indices(local_indices) - i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1], index_range) - j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2], index_range) - i_surface = i_surface_start - j_surface = j_surface_start - - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction on the local element - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(local_direction, contravariant_vectors, - i_element, j_element, k_element, - local_element) - - calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, local_side, - i_surface, j_surface, local_direction, local_element) - - # Increment local element indices to pull the normal direction - i_element += i_element_step_i - j_element += j_element_step_i - k_element += k_element_step_i - # Increment the surface node indices along the local element - i_surface += i_surface_step_i - j_surface += j_surface_step_i - end - # Increment local element indices to pull the normal direction - i_element += i_element_step_j - j_element += j_element_step_j - k_element += k_element_step_j - # Increment the surface node indices along the local element - i_surface += i_surface_step_j - j_surface += j_surface_step_j + @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + @threaded for interface in eachmpiinterface(dg, cache) + # Get element and side index information on the local element + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + + # Create the local i,j,k indexing on the local element used to pull normal direction information + i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], + index_range) + j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], + index_range) + k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], + index_range) + + i_element = i_element_start + j_element = j_element_start + k_element = k_element_start + + # Initiate the node indices to be used in the surface for loop, + # the surface flux storage must be indexed in alignment with the local element indexing + local_surface_indices = surface_indices(local_indices) + i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1], + index_range) + j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2], + index_range) + i_surface = i_surface_start + j_surface = j_surface_start + + for j in eachnode(dg) + for i in eachnode(dg) + # Get the normal direction on the local element + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_element, j_element, k_element, + local_element) + + calc_mpi_interface_flux!(surface_flux_values, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache, + interface, normal_direction, + i, j, local_side, + i_surface, j_surface, local_direction, + local_element) + + # Increment local element indices to pull the normal direction + i_element += i_element_step_i + j_element += j_element_step_i + k_element += k_element_step_i + # Increment the surface node indices along the local element + i_surface += i_surface_step_i + j_surface += j_surface_step_i + end + # Increment local element indices to pull the normal direction + i_element += i_element_step_j + j_element += j_element_step_j + k_element += k_element_step_j + # Increment the surface node indices along the local element + i_surface += i_surface_step_j + j_surface += j_surface_step_j + end end - end - return nothing + return nothing end # Inlined version of the interface flux computation for conservation laws @@ -212,181 +241,198 @@ end nonconservative_terms::False, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, - interface_i_node_index, interface_j_node_index, local_side, + interface_i_node_index, + interface_j_node_index, local_side, surface_i_node_index, surface_j_node_index, local_direction_index, local_element_index) - @unpack u = cache.mpi_interfaces - @unpack surface_flux = surface_integral - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, - interface_i_node_index, interface_j_node_index, interface_index) - - if local_side == 1 - flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) - else # local_side == 2 - flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) - end - - for v in eachvariable(equations) - surface_flux_values[v, surface_i_node_index, surface_j_node_index, - local_direction_index, local_element_index] = flux_[v] - end -end + @unpack u = cache.mpi_interfaces + @unpack surface_flux = surface_integral + u_ll, u_rr = get_surface_node_vars(u, equations, dg, + interface_i_node_index, interface_j_node_index, + interface_index) + + if local_side == 1 + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + else # local_side == 2 + flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) + end + + for v in eachvariable(equations) + surface_flux_values[v, surface_i_node_index, surface_j_node_index, + local_direction_index, local_element_index] = flux_[v] + end +end function prolong2mpimortars!(cache, u, mesh::ParallelP4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - @unpack node_indices = cache.mpi_mortars - index_range = eachnode(dg) - - @threaded for mortar in eachmpimortar(dg, cache) - local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] - local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] - - # Get start value and step size for indices on both sides to get the correct face - # and orientation - small_indices = node_indices[1, mortar] - i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range) - j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range) - k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range) - - large_indices = node_indices[2, mortar] - i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], index_range) - j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], index_range) - k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], index_range) - - - for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) - if position == 5 # -> large element - # Buffer to copy solution values of the large element in the correct orientation - # before interpolating - u_buffer = cache.u_threaded[Threads.threadid()] - # temporary buffer for projections - fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()] - - i_large = i_large_start - j_large = j_large_start - k_large = k_large_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element] + @unpack node_indices = cache.mpi_mortars + index_range = eachnode(dg) + + @threaded for mortar in eachmpimortar(dg, cache) + local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] + local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] + + # Get start value and step size for indices on both sides to get the correct face + # and orientation + small_indices = node_indices[1, mortar] + i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], + index_range) + j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], + index_range) + k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], + index_range) + + large_indices = node_indices[2, mortar] + i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], + index_range) + j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], + index_range) + k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], + index_range) + + for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) + if position == 5 # -> large element + # Buffer to copy solution values of the large element in the correct orientation + # before interpolating + u_buffer = cache.u_threaded[Threads.threadid()] + # temporary buffer for projections + fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()] + + i_large = i_large_start + j_large = j_large_start + k_large = k_large_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element] + end + + i_large += i_large_step_i + j_large += j_large_step_i + k_large += k_large_step_i + end + i_large += i_large_step_j + j_large += j_large_step_j + k_large += k_large_step_j + end + + # Interpolate large element face data from buffer to small face locations + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :, + mortar), + mortar_l2.forward_lower, + mortar_l2.forward_lower, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :, + mortar), + mortar_l2.forward_upper, + mortar_l2.forward_lower, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :, + mortar), + mortar_l2.forward_lower, + mortar_l2.forward_upper, + u_buffer, + fstar_tmp) + multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :, + mortar), + mortar_l2.forward_upper, + mortar_l2.forward_upper, + u_buffer, + fstar_tmp) + else # position in (1, 2, 3, 4) -> small element + # Copy solution data from the small elements + i_small = i_small_start + j_small = j_small_start + k_small = k_small_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v, + i_small, + j_small, + k_small, + element] + end + i_small += i_small_step_i + j_small += j_small_step_i + k_small += k_small_step_i + end + i_small += i_small_step_j + j_small += j_small_step_j + k_small += k_small_step_j + end end - - i_large += i_large_step_i - j_large += j_large_step_i - k_large += k_large_step_i - end - i_large += i_large_step_j - j_large += j_large_step_j - k_large += k_large_step_j end - - # Interpolate large element face data from buffer to small face locations - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :, mortar), - mortar_l2.forward_lower, - mortar_l2.forward_lower, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :, mortar), - mortar_l2.forward_upper, - mortar_l2.forward_lower, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :, mortar), - mortar_l2.forward_lower, - mortar_l2.forward_upper, - u_buffer, - fstar_tmp) - multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :, mortar), - mortar_l2.forward_upper, - mortar_l2.forward_upper, - u_buffer, - fstar_tmp) - else # position in (1, 2, 3, 4) -> small element - # Copy solution data from the small elements - i_small = i_small_start - j_small = j_small_start - k_small = k_small_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v, i_small, j_small, k_small, element] - end - i_small += i_small_step_i - j_small += j_small_step_i - k_small += k_small_step_i - end - i_small += i_small_step_j - j_small += j_small_step_j - k_small += k_small_step_j - end - end end - end - return nothing + return nothing end - function calc_mpi_mortar_flux!(surface_flux_values, mesh::ParallelP4estMesh{3}, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars - @unpack contravariant_vectors = cache.elements - @unpack fstar_threaded, fstar_tmp_threaded = cache - index_range = eachnode(dg) - - @threaded for mortar in eachmpimortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar = fstar_threaded[Threads.threadid()] - fstar_tmp = fstar_tmp_threaded[Threads.threadid()] - - # Get index information on the small elements - small_indices = node_indices[1, mortar] - - i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range) - j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range) - k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range) - - for position in 1:4 - i_small = i_small_start - j_small = j_small_start - k_small = k_small_start - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction on the small element. - normal_direction = get_normal_direction(cache.mpi_mortars, i, j, position, mortar) - - calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations, - surface_integral, dg, cache, - mortar, position, normal_direction, - i, j) - - i_small += i_small_step_i - j_small += j_small_step_i - k_small += k_small_step_i + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars + @unpack contravariant_vectors = cache.elements + @unpack fstar_threaded, fstar_tmp_threaded = cache + index_range = eachnode(dg) + + @threaded for mortar in eachmpimortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar = fstar_threaded[Threads.threadid()] + fstar_tmp = fstar_tmp_threaded[Threads.threadid()] + + # Get index information on the small elements + small_indices = node_indices[1, mortar] + + i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], + index_range) + j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], + index_range) + k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], + index_range) + + for position in 1:4 + i_small = i_small_start + j_small = j_small_start + k_small = k_small_start + for j in eachnode(dg) + for i in eachnode(dg) + # Get the normal direction on the small element. + normal_direction = get_normal_direction(cache.mpi_mortars, i, j, + position, mortar) + + calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + surface_integral, dg, cache, + mortar, position, normal_direction, + i, j) + + i_small += i_small_step_i + j_small += j_small_step_i + k_small += k_small_step_i + end + end + i_small += i_small_step_j + j_small += j_small_step_j + k_small += k_small_step_j end - end - i_small += i_small_step_j - j_small += j_small_step_j - k_small += k_small_step_j - end - # Buffer to interpolate flux values of the large element to before - # copying in the correct orientation - u_buffer = cache.u_threaded[Threads.threadid()] + # Buffer to interpolate flux values of the large element to before + # copying in the correct orientation + u_buffer = cache.u_threaded[Threads.threadid()] - mpi_mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar, u_buffer, fstar_tmp) - end + mpi_mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar, u_buffer, fstar_tmp) + end - return nothing + return nothing end # Inlined version of the mortar flux computation on small elements for conservation laws @@ -396,96 +442,103 @@ end surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, i_node_index, j_node_index) - @unpack u = cache.mpi_mortars - @unpack surface_flux = surface_integral + @unpack u = cache.mpi_mortars + @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index) + u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, + j_node_index, mortar_index) - flux = surface_flux(u_ll, u_rr, normal_direction, equations) + flux = surface_flux(u_ll, u_rr, normal_direction, equations) - # Copy flux to buffer - set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, position_index) + # Copy flux to buffer + set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, + position_index) end - @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values, mesh::ParallelP4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM, cache, mortar, fstar, u_buffer, fstar_tmp) - @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars - index_range = eachnode(dg) - - small_indices = node_indices[1, mortar] - small_direction = indices2direction(small_indices) - large_indices = node_indices[2, mortar] - large_direction = indices2direction(large_indices) - large_surface_indices = surface_indices(large_indices) - - i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], index_range) - j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], index_range) - - for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar]) - if position == 5 # -> large element - # Project small fluxes to large element. - multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_lower, mortar_l2.reverse_lower, - view(fstar, .., 1), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_upper, mortar_l2.reverse_lower, - view(fstar, .., 2), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_lower, mortar_l2.reverse_upper, - view(fstar, .., 3), - fstar_tmp) - add_multiply_dimensionwise!( - u_buffer, - mortar_l2.reverse_upper, mortar_l2.reverse_upper, - view(fstar, .., 4), - fstar_tmp) - # The flux is calculated in the outward direction of the small elements, - # so the sign must be switched to get the flux in outward direction - # of the large element. - # The contravariant vectors of the large element (and therefore the normal - # vectors of the large element as well) are four times as large as the - # contravariant vectors of the small elements. Therefore, the flux needs - # to be scaled by a factor of 4 to obtain the flux of the large element. - u_buffer .*= -4 - # Copy interpolated flux values from buffer to large element face in the - # correct orientation. - # Note that the index of the small sides will always run forward but - # the index of the large side might need to run backwards for flipped sides. - i_large = i_large_start - j_large = j_large_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v, i, j] - end - i_large += i_large_step_i - j_large += j_large_step_i - end - i_large += i_large_step_j - j_large += j_large_step_j - end - else # position in (1, 2, 3, 4) -> small element - # Copy solution small to small - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, position] - end + dg::DGSEM, cache, mortar, fstar, + u_buffer, fstar_tmp) + @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars + index_range = eachnode(dg) + + small_indices = node_indices[1, mortar] + small_direction = indices2direction(small_indices) + large_indices = node_indices[2, mortar] + large_direction = indices2direction(large_indices) + large_surface_indices = surface_indices(large_indices) + + i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], + index_range) + j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], + index_range) + + for (element, position) in zip(local_neighbor_ids[mortar], + local_neighbor_positions[mortar]) + if position == 5 # -> large element + # Project small fluxes to large element. + multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_lower, mortar_l2.reverse_lower, + view(fstar, .., 1), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, + mortar_l2.reverse_lower, + view(fstar, .., 2), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_lower, + mortar_l2.reverse_upper, + view(fstar, .., 3), + fstar_tmp) + add_multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, + mortar_l2.reverse_upper, + view(fstar, .., 4), + fstar_tmp) + # The flux is calculated in the outward direction of the small elements, + # so the sign must be switched to get the flux in outward direction + # of the large element. + # The contravariant vectors of the large element (and therefore the normal + # vectors of the large element as well) are four times as large as the + # contravariant vectors of the small elements. Therefore, the flux needs + # to be scaled by a factor of 4 to obtain the flux of the large element. + u_buffer .*= -4 + # Copy interpolated flux values from buffer to large element face in the + # correct orientation. + # Note that the index of the small sides will always run forward but + # the index of the large side might need to run backwards for flipped sides. + i_large = i_large_start + j_large = j_large_start + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v, + i, + j] + end + i_large += i_large_step_i + j_large += j_large_step_i + end + i_large += i_large_step_j + j_large += j_large_step_j + end + else # position in (1, 2, 3, 4) -> small element + # Copy solution small to small + for j in eachnode(dg) + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, j, small_direction, element] = fstar[v, + i, + j, + position] + end + end + end end - end end - end - return nothing + return nothing end - - -end # muladd \ No newline at end of file +end # muladd diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 6d3803225a0..ac122d048c1 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -3,175 +3,181 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent mutable struct P4estMPICache{uEltype} - mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} - mpi_send_requests::Vector{MPI.Request} - mpi_recv_requests::Vector{MPI.Request} - n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} - n_elements_global::Int - first_element_global_id::Int + mpi_neighbor_ranks::Vector{Int} + mpi_neighbor_interfaces::Vector{Vector{Int}} + mpi_neighbor_mortars::Vector{Vector{Int}} + mpi_send_buffers::Vector{Vector{uEltype}} + mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_send_requests::Vector{MPI.Request} + mpi_recv_requests::Vector{MPI.Request} + n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} + n_elements_global::Int + first_element_global_id::Int end function P4estMPICache(uEltype) - # MPI communication "just works" for bitstypes only - if !isbitstype(uEltype) - throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype.")) - end - - mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_send_requests = Vector{MPI.Request}(undef, 0) - mpi_recv_requests = Vector{MPI.Request}(undef, 0) - n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) - n_elements_global = 0 - first_element_global_id = 0 - - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) -end + # MPI communication "just works" for bitstypes only + if !isbitstype(uEltype) + throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype.")) + end -@inline Base.eltype(::P4estMPICache{uEltype}) where uEltype = uEltype + mpi_neighbor_ranks = Vector{Int}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_send_requests = Vector{MPI.Request}(undef, 0) + mpi_recv_requests = Vector{MPI.Request}(undef, 0) + n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) + n_elements_global = 0 + first_element_global_id = 0 + + P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) +end +@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache) - data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) - n_small_elements = 2^(ndims(mesh)-1) - - for d in 1:length(mpi_cache.mpi_neighbor_ranks) - send_buffer = mpi_cache.mpi_send_buffers[d] - - for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) - first = (index - 1) * data_size + 1 - last = (index - 1) * data_size + data_size - local_side = cache.mpi_interfaces.local_sides[interface] - @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, .., interface]) - end + data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) + n_small_elements = 2^(ndims(mesh) - 1) + + for d in 1:length(mpi_cache.mpi_neighbor_ranks) + send_buffer = mpi_cache.mpi_send_buffers[d] + + for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + local_side = cache.mpi_interfaces.local_sides[interface] + @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, .., + interface]) + end - # Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local - # data exists - interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size - mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * n_small_elements * 2 * data_size - # `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type - send_buffer[interfaces_data_size+1:interfaces_data_size+mortars_data_size] .= NaN |> eltype(mpi_cache) - - for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) - index_base = interfaces_data_size + (index - 1) * n_small_elements * 2 * data_size - indices = buffer_mortar_indices(mesh, index_base, data_size) - - for position in cache.mpi_mortars.local_neighbor_positions[mortar] - first, last = indices[position] - if position > n_small_elements # large element - @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :, .., mortar]) - else # small element - @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :, position, .., mortar]) + # Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local + # data exists + interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size + mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * + n_small_elements * 2 * data_size + # `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type + send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN |> + eltype(mpi_cache) + + for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) + index_base = interfaces_data_size + + (index - 1) * n_small_elements * 2 * data_size + indices = buffer_mortar_indices(mesh, index_base, data_size) + + for position in cache.mpi_mortars.local_neighbor_positions[mortar] + first, last = indices[position] + if position > n_small_elements # large element + @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :, + .., + mortar]) + else # small element + @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :, + position, + .., + mortar]) + end + end end - end end - end - # Start sending - for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) - mpi_cache.mpi_send_requests[index] = MPI.Isend( - mpi_cache.mpi_send_buffers[index], d, mpi_rank(), mpi_comm()) - end + # Start sending + for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) + mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index], + d, mpi_rank(), mpi_comm()) + end - return nothing + return nothing end - function start_mpi_receive!(mpi_cache::P4estMPICache) - for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) - mpi_cache.mpi_recv_requests[index] = MPI.Irecv!( - mpi_cache.mpi_recv_buffers[index], d, d, mpi_comm()) - end + for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) + mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index], + d, d, mpi_comm()) + end - return nothing + return nothing end - function finish_mpi_send!(mpi_cache::P4estMPICache) - MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status) + MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status) end - function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache) - data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) - n_small_elements = 2^(ndims(mesh)-1) - n_positions = n_small_elements + 1 - - # Start receiving and unpack received data until all communication is finished - d = MPI.Waitany(mpi_cache.mpi_recv_requests) - while d !== nothing - recv_buffer = mpi_cache.mpi_recv_buffers[d] - - for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) - first = (index - 1) * data_size + 1 - last = (index - 1) * data_size + data_size - - if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side - @views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last] - else # local element at secondary side - @views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last] - end - end + data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) + n_small_elements = 2^(ndims(mesh) - 1) + n_positions = n_small_elements + 1 - interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size - for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) - index_base = interfaces_data_size + (index - 1) * n_small_elements * 2 * data_size - indices = buffer_mortar_indices(mesh, index_base, data_size) - - for position in 1:n_positions - # Skip if received data for `position` is NaN as no real data has been sent for the - # corresponding element - if isnan(recv_buffer[Base.first(indices[position])]) - continue + # Start receiving and unpack received data until all communication is finished + d = MPI.Waitany(mpi_cache.mpi_recv_requests) + while d !== nothing + recv_buffer = mpi_cache.mpi_recv_buffers[d] + + for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + + if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side + @views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last] + else # local element at secondary side + @views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last] + end end - first, last = indices[position] - if position == n_positions # large element - @views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last] - else # small element - @views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last] + interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size + for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) + index_base = interfaces_data_size + + (index - 1) * n_small_elements * 2 * data_size + indices = buffer_mortar_indices(mesh, index_base, data_size) + + for position in 1:n_positions + # Skip if received data for `position` is NaN as no real data has been sent for the + # corresponding element + if isnan(recv_buffer[Base.first(indices[position])]) + continue + end + + first, last = indices[position] + if position == n_positions # large element + @views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last] + else # small element + @views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last] + end + end end - end - end - d = MPI.Waitany(mpi_cache.mpi_recv_requests) - end + d = MPI.Waitany(mpi_cache.mpi_recv_requests) + end - return nothing + return nothing end - # Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the # data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin # at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small # position (i.e. position 1 or 2). The data corresponding to the large side (i.e. position 3) has # size `2 * data_size`. -@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{2}, index_base, data_size) - return ( - # first, last for local element in position 1 (small element) - (index_base + 1, - index_base + 1 * data_size), - # first, last for local element in position 2 (small element) - (index_base + 1 * data_size + 1, - index_base + 2 * data_size), - # first, last for local element in position 3 (large element) - (index_base + 2 * data_size + 1, - index_base + 4 * data_size), - ) +@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{2}, index_base, + data_size) + return ( + # first, last for local element in position 1 (small element) + (index_base + 1, + index_base + 1 * data_size), + # first, last for local element in position 2 (small element) + (index_base + 1 * data_size + 1, + index_base + 2 * data_size), + # first, last for local element in position 3 (large element) + (index_base + 2 * data_size + 1, + index_base + 4 * data_size)) end # Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the @@ -179,354 +185,401 @@ end # at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small # position (i.e. position 1 to 4). The data corresponding to the large side (i.e. position 5) has # size `4 * data_size`. -@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{3}, index_base, data_size) - return ( - # first, last for local element in position 1 (small element) - (index_base + 1, - index_base + 1 * data_size), - # first, last for local element in position 2 (small element) - (index_base + 1 * data_size + 1, - index_base + 2 * data_size), - # first, last for local element in position 3 (small element) - (index_base + 2 * data_size + 1, - index_base + 3 * data_size), - # first, last for local element in position 4 (small element) - (index_base + 3 * data_size + 1, - index_base + 4 * data_size), - # first, last for local element in position 5 (large element) - (index_base + 4 * data_size + 1, - index_base + 8 * data_size), - ) +@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{3}, index_base, + data_size) + return ( + # first, last for local element in position 1 (small element) + (index_base + 1, + index_base + 1 * data_size), + # first, last for local element in position 2 (small element) + (index_base + 1 * data_size + 1, + index_base + 2 * data_size), + # first, last for local element in position 3 (small element) + (index_base + 2 * data_size + 1, + index_base + 3 * data_size), + # first, last for local element in position 4 (small element) + (index_base + 3 * data_size + 1, + index_base + 4 * data_size), + # first, last for local element in position 5 (large element) + (index_base + 4 * data_size + 1, + index_base + 8 * data_size)) end - # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache(mesh::ParallelP4estMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real} - # Make sure to balance and partition the p4est and create a new ghost layer before creating any - # containers in case someone has tampered with the p4est after creating the mesh - balance!(mesh) - partition!(mesh) - update_ghost_layer!(mesh) - - elements = init_elements(mesh, equations, dg.basis, uEltype) +function create_cache(mesh::ParallelP4estMesh, equations::AbstractEquations, dg::DG, + ::Any, ::Type{uEltype}) where {uEltype <: Real} + # Make sure to balance and partition the p4est and create a new ghost layer before creating any + # containers in case someone has tampered with the p4est after creating the mesh + balance!(mesh) + partition!(mesh) + update_ghost_layer!(mesh) - mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements) - mpi_mortars = init_mpi_mortars(mesh, equations, dg.basis, elements) - mpi_cache = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars, - nvariables(equations), nnodes(dg), uEltype) + elements = init_elements(mesh, equations, dg.basis, uEltype) - exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg)) + mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements) + mpi_mortars = init_mpi_mortars(mesh, equations, dg.basis, elements) + mpi_cache = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars, + nvariables(equations), nnodes(dg), uEltype) - interfaces = init_interfaces(mesh, equations, dg.basis, elements) - boundaries = init_boundaries(mesh, equations, dg.basis, elements) - mortars = init_mortars(mesh, equations, dg.basis, elements) + exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg)) + interfaces = init_interfaces(mesh, equations, dg.basis, elements) + boundaries = init_boundaries(mesh, equations, dg.basis, elements) + mortars = init_mortars(mesh, equations, dg.basis, elements) - cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars, mpi_cache) + cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars, + mpi_cache) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (; cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) - return cache + return cache end +function init_mpi_cache(mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, + nnodes, uEltype) + mpi_cache = P4estMPICache(uEltype) + init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes, + uEltype) -function init_mpi_cache(mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype) - mpi_cache = P4estMPICache(uEltype) - init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype) - - return mpi_cache + return mpi_cache end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = - init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars, mesh) - - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = - init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, - ndims(mesh), nvars, n_nodes, uEltype) - - # Determine local and total number of elements - n_elements_global = Int(unsafe_load(mesh.p4est).global_num_quadrants) - n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks())), - n_elements_global) |> diff # diff sufficient due to 0-based quad indices - n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) - # Account for 1-based indexing in Julia - first_element_global_id = Int(unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1)) + 1 - @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements" - - # TODO reuse existing structures - @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id + mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) + + mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, + mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) + + # Determine local and total number of elements + n_elements_global = Int(unsafe_load(mesh.p4est).global_num_quadrants) + n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, + unsafe_load(mesh.p4est).global_first_quadrant, + mpi_nranks())), + n_elements_global) |> diff # diff sufficient due to 0-based quad indices + n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) + # Account for 1-based indexing in Julia + first_element_global_id = Int(unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, + mpi_rank() + 1)) + 1 + @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + + # TODO reuse existing structures + @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id end -function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars, mesh::ParallelP4estMesh) - # Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face - # to collect connectivity information - iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh))) - user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh) - - iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c) - - # Build proper connectivity data structures from information gathered by iterating over p4est - @unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data - - mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> sort |> unique - - p = sortperm(global_interface_ids) - neighbor_ranks_interface .= neighbor_ranks_interface[p] - interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] - - p = sortperm(global_mortar_ids) - neighbor_ranks_mortar .= neighbor_ranks_mortar[p] - mortar_ids = collect(1:nmpimortars(mpi_mortars))[p] - - # For each neighbor rank, init connectivity data structures - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) - for (index, d) in enumerate(mpi_neighbor_ranks) - mpi_neighbor_interfaces[index] = interface_ids[findall(==(d), neighbor_ranks_interface)] - mpi_neighbor_mortars[index] = mortar_ids[findall(x->(d in x), neighbor_ranks_mortar)] - end +function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars, + mesh::ParallelP4estMesh) + # Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face + # to collect connectivity information + iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh))) + user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, + mpi_mortars, mesh) + + iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost, + iter_face_c = iter_face_c) + + # Build proper connectivity data structures from information gathered by iterating over p4est + @unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data + + mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> + sort |> unique + + p = sortperm(global_interface_ids) + neighbor_ranks_interface .= neighbor_ranks_interface[p] + interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] + + p = sortperm(global_mortar_ids) + neighbor_ranks_mortar .= neighbor_ranks_mortar[p] + mortar_ids = collect(1:nmpimortars(mpi_mortars))[p] + + # For each neighbor rank, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + for (index, d) in enumerate(mpi_neighbor_ranks) + mpi_neighbor_interfaces[index] = interface_ids[findall(==(d), + neighbor_ranks_interface)] + mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (d in x), + neighbor_ranks_mortar)] + end - # Check that all interfaces were counted exactly once - @assert mapreduce(length, +, mpi_neighbor_interfaces; init=0) == nmpiinterfaces(mpi_interfaces) + # Check that all interfaces were counted exactly once + @assert mapreduce(length, +, mpi_neighbor_interfaces; init = 0) == + nmpiinterfaces(mpi_interfaces) - return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars + return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars end -mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars, Mesh} - interfaces::MPIInterfaces - interface_id::Int - global_interface_ids::Vector{Int} - neighbor_ranks_interface::Vector{Int} - mortars::MPIMortars - mortar_id::Int - global_mortar_ids::Vector{Int} - neighbor_ranks_mortar::Vector{Vector{Int}} - mesh::Mesh +mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars, + Mesh} + interfaces::MPIInterfaces + interface_id::Int + global_interface_ids::Vector{Int} + neighbor_ranks_interface::Vector{Int} + mortars::MPIMortars + mortar_id::Int + global_mortar_ids::Vector{Int} + neighbor_ranks_mortar::Vector{Vector{Int}} + mesh::Mesh end function InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh) - global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) - neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)) - global_mortar_ids = fill(-1, nmpimortars(mpi_mortars)) - neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars)) - - return InitNeighborRankConnectivityIterFaceUserData{ - typeof(mpi_interfaces), typeof(mpi_mortars), typeof(mesh)}( - mpi_interfaces, 1, global_interface_ids, neighbor_ranks_interface, - mpi_mortars, 1, global_mortar_ids, neighbor_ranks_mortar, - mesh) + global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)) + global_mortar_ids = fill(-1, nmpimortars(mpi_mortars)) + neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars)) + + return InitNeighborRankConnectivityIterFaceUserData{ + typeof(mpi_interfaces), + typeof(mpi_mortars), + typeof(mesh)}(mpi_interfaces, 1, + global_interface_ids, + neighbor_ranks_interface, + mpi_mortars, 1, + global_mortar_ids, + neighbor_ranks_mortar, + mesh) end function init_neighbor_rank_connectivity_iter_face(info, user_data) - data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data)) + data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data)) - # Function barrier because the unpacked user_data above is not type-stable - init_neighbor_rank_connectivity_iter_face_inner(info, data) + # Function barrier because the unpacked user_data above is not type-stable + init_neighbor_rank_connectivity_iter_face_inner(info, data) end # 2D -cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2}) = @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2}) + @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, + (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid})) +end # 3D -cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3}) = @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3}) + @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, + (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid})) +end # Function barrier for type stability function init_neighbor_rank_connectivity_iter_face_inner(info, user_data) - @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface, - mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data - - # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI - # interface/mortar - if unsafe_load(info).sides.elem_count == 2 # MPI interfaces/mortars have two neighboring elements - # Extract surface data - sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) - - if sides[1].is_hanging == false && sides[2].is_hanging == false # No hanging nodes for MPI interfaces - if sides[1].is.full.is_ghost == true - remote_side = 1 - local_side = 2 - elseif sides[2].is.full.is_ghost == true - remote_side = 2 - local_side = 1 - else # both sides are on this rank -> skip since it's a regular interface - return nothing - end - - # Sanity check, current face should belong to current MPI interface - local_tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) # one-based indexing - local_quad_id = local_tree.quadrants_offset + sides[local_side].is.full.quadid - @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing - - # Get neighbor ID from ghost layer - proc_offsets = unsafe_wrap(Array, - unsafe_load(unsafe_load(info).ghost_layer).proc_offsets, - mpi_nranks() + 1) - ghost_id = sides[remote_side].is.full.quadid # indexes the ghost layer, 0-based - neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1], - 1:mpi_nranks()) - 1 # MPI ranks are 0-based - neighbor_ranks_interface[interface_id] = neighbor_rank - - # Global interface id is the globally unique quadrant id of the quadrant on the primary - # side (1) multiplied by the number of faces per quadrant plus face - if local_side == 1 - offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1) # one-based indexing - primary_quad_id = offset + local_quad_id - else - offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, neighbor_rank + 1) # one-based indexing - primary_quad_id = offset + unsafe_load(sides[1].is.full.quad.p.piggy3.local_num) - end - global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides[1].face - global_interface_ids[interface_id] = global_interface_id - - user_data.interface_id += 1 - else # hanging node - if sides[1].is_hanging == true - hanging_side = 1 - full_side = 2 - else - hanging_side = 2 - full_side = 1 - end - # Verify before accessing is.full / is.hanging - @assert sides[hanging_side].is_hanging == true && sides[full_side].is_hanging == false - - # If all quadrants are locally available, this is a regular mortar -> skip - if sides[full_side].is.full.is_ghost == false && all(sides[hanging_side].is.hanging.is_ghost .== false) - return nothing - end - - trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), - unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) - - # Find small quads that are remote and determine which rank owns them - remote_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== true) - proc_offsets = unsafe_wrap(Array, - unsafe_load(unsafe_load(info).ghost_layer).proc_offsets, - mpi_nranks() + 1) - # indices of small remote quads inside the ghost layer, 0-based - ghost_ids = map(pos -> sides[hanging_side].is.hanging.quadid[pos], remote_small_quad_positions) - neighbor_ranks = map(ghost_ids) do ghost_id - return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1], - 1:mpi_nranks()) - 1 # MPI ranks are 0-based - end - # Determine global quad id of large element to determine global MPI mortar id - # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks - if sides[full_side].is.full.is_ghost == true - ghost_id = sides[full_side].is.full.quadid - large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1], - 1:mpi_nranks()) - 1 # MPI ranks are 0-based - push!(neighbor_ranks, large_quad_owner_rank) - - offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, large_quad_owner_rank + 1) # one-based indexing - large_quad_id = offset + unsafe_load(sides[full_side].is.full.quad.p.piggy3.local_num) - else - offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1) # one-based indexing - large_quad_id = offset + trees[full_side].quadrants_offset + sides[full_side].is.full.quadid - end - neighbor_ranks_mortar[mortar_id] = neighbor_ranks - # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the - # number of faces per quadrant plus face - global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id + sides[full_side].face - - user_data.mortar_id += 1 + @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface, + mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data + + # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI + # interface/mortar + if unsafe_load(info).sides.elem_count == 2 # MPI interfaces/mortars have two neighboring elements + # Extract surface data + sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2)) + + if sides[1].is_hanging == false && sides[2].is_hanging == false # No hanging nodes for MPI interfaces + if sides[1].is.full.is_ghost == true + remote_side = 1 + local_side = 2 + elseif sides[2].is.full.is_ghost == true + remote_side = 2 + local_side = 1 + else # both sides are on this rank -> skip since it's a regular interface + return nothing + end + + # Sanity check, current face should belong to current MPI interface + local_tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) # one-based indexing + local_quad_id = local_tree.quadrants_offset + + sides[local_side].is.full.quadid + @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing + + # Get neighbor ID from ghost layer + proc_offsets = unsafe_wrap(Array, + unsafe_load(unsafe_load(info).ghost_layer).proc_offsets, + mpi_nranks() + 1) + ghost_id = sides[remote_side].is.full.quadid # indexes the ghost layer, 0-based + neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id < + proc_offsets[r + 1], + 1:mpi_nranks()) - 1 # MPI ranks are 0-based + neighbor_ranks_interface[interface_id] = neighbor_rank + + # Global interface id is the globally unique quadrant id of the quadrant on the primary + # side (1) multiplied by the number of faces per quadrant plus face + if local_side == 1 + offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, + mpi_rank() + 1) # one-based indexing + primary_quad_id = offset + local_quad_id + else + offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, + neighbor_rank + 1) # one-based indexing + primary_quad_id = offset + + unsafe_load(sides[1].is.full.quad.p.piggy3.local_num) + end + global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides[1].face + global_interface_ids[interface_id] = global_interface_id + + user_data.interface_id += 1 + else # hanging node + if sides[1].is_hanging == true + hanging_side = 1 + full_side = 2 + else + hanging_side = 2 + full_side = 1 + end + # Verify before accessing is.full / is.hanging + @assert sides[hanging_side].is_hanging == true && + sides[full_side].is_hanging == false + + # If all quadrants are locally available, this is a regular mortar -> skip + if sides[full_side].is.full.is_ghost == false && + all(sides[hanging_side].is.hanging.is_ghost .== false) + return nothing + end + + trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1), + unsafe_load_tree(mesh.p4est, sides[2].treeid + 1)) + + # Find small quads that are remote and determine which rank owns them + remote_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== + true) + proc_offsets = unsafe_wrap(Array, + unsafe_load(unsafe_load(info).ghost_layer).proc_offsets, + mpi_nranks() + 1) + # indices of small remote quads inside the ghost layer, 0-based + ghost_ids = map(pos -> sides[hanging_side].is.hanging.quadid[pos], + remote_small_quad_positions) + neighbor_ranks = map(ghost_ids) do ghost_id + return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r + 1], + 1:mpi_nranks()) - 1 # MPI ranks are 0-based + end + # Determine global quad id of large element to determine global MPI mortar id + # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks + if sides[full_side].is.full.is_ghost == true + ghost_id = sides[full_side].is.full.quadid + large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id < + proc_offsets[r + 1], + 1:mpi_nranks()) - 1 # MPI ranks are 0-based + push!(neighbor_ranks, large_quad_owner_rank) + + offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, + large_quad_owner_rank + 1) # one-based indexing + large_quad_id = offset + + unsafe_load(sides[full_side].is.full.quad.p.piggy3.local_num) + else + offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, + mpi_rank() + 1) # one-based indexing + large_quad_id = offset + trees[full_side].quadrants_offset + + sides[full_side].is.full.quadid + end + neighbor_ranks_mortar[mortar_id] = neighbor_ranks + # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the + # number of faces per quadrant plus face + global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id + + sides[full_side].face + + user_data.mortar_id += 1 + end end - end - return nothing + return nothing end - # Exchange normal directions of small elements of the MPI mortars. They are needed on all involved # MPI ranks to calculate the mortar fluxes. -function exchange_normal_directions!(mpi_mortars, mpi_cache, mesh::ParallelP4estMesh, n_nodes) - RealT = real(mesh) - n_dims = ndims(mesh) - @unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache - n_small_elements = 2^(n_dims-1) - data_size = n_nodes^(n_dims - 1) * n_dims - - # Create buffers and requests - send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars)) - recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars)) - for index in 1:length(mpi_neighbor_mortars) - send_buffers[index] = Vector{RealT}(undef, length(mpi_neighbor_mortars[index]) * n_small_elements * data_size) - send_buffers[index] .= NaN |> RealT - recv_buffers[index] = Vector{RealT}(undef, length(mpi_neighbor_mortars[index]) * n_small_elements * data_size) - recv_buffers[index] .= NaN |> RealT - end - send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars)) - recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars)) - - # Fill send buffers - for d in 1:length(mpi_neighbor_ranks) - send_buffer = send_buffers[d] - - for (index, mortar) in enumerate(mpi_neighbor_mortars[d]) - index_base = (index - 1) * n_small_elements * data_size - indices = buffer_mortar_indices(mesh, index_base, data_size) - for position in mpi_mortars.local_neighbor_positions[mortar] - if position <= n_small_elements # element is small - first, last = indices[position] - @views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:, .., position, mortar]) - end - end +function exchange_normal_directions!(mpi_mortars, mpi_cache, mesh::ParallelP4estMesh, + n_nodes) + RealT = real(mesh) + n_dims = ndims(mesh) + @unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache + n_small_elements = 2^(n_dims - 1) + data_size = n_nodes^(n_dims - 1) * n_dims + + # Create buffers and requests + send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars)) + recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars)) + for index in 1:length(mpi_neighbor_mortars) + send_buffers[index] = Vector{RealT}(undef, + length(mpi_neighbor_mortars[index]) * + n_small_elements * data_size) + send_buffers[index] .= NaN |> RealT + recv_buffers[index] = Vector{RealT}(undef, + length(mpi_neighbor_mortars[index]) * + n_small_elements * data_size) + recv_buffers[index] .= NaN |> RealT end - end - - # Start data exchange - for (index, d) in enumerate(mpi_neighbor_ranks) - send_requests[index] = MPI.Isend(send_buffers[index], d, mpi_rank(), mpi_comm()) - recv_requests[index] = MPI.Irecv!(recv_buffers[index], d, d, mpi_comm()) - end - - # Unpack data from receive buffers - d = MPI.Waitany(recv_requests) - while d !== nothing - recv_buffer = recv_buffers[d] - - for (index, mortar) in enumerate(mpi_neighbor_mortars[d]) - index_base = (index - 1) * n_small_elements * data_size - indices = buffer_mortar_indices(mesh, index_base, data_size) - for position in 1:n_small_elements - # Skip if received data for `position` is NaN as no real data has been sent for the - # corresponding element - if isnan(recv_buffer[Base.first(indices[position])]) - continue + send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars)) + recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars)) + + # Fill send buffers + for d in 1:length(mpi_neighbor_ranks) + send_buffer = send_buffers[d] + + for (index, mortar) in enumerate(mpi_neighbor_mortars[d]) + index_base = (index - 1) * n_small_elements * data_size + indices = buffer_mortar_indices(mesh, index_base, data_size) + for position in mpi_mortars.local_neighbor_positions[mortar] + if position <= n_small_elements # element is small + first, last = indices[position] + @views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:, + .., + position, + mortar]) + end + end end + end - first, last = indices[position] - @views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last] - end + # Start data exchange + for (index, d) in enumerate(mpi_neighbor_ranks) + send_requests[index] = MPI.Isend(send_buffers[index], d, mpi_rank(), mpi_comm()) + recv_requests[index] = MPI.Irecv!(recv_buffers[index], d, d, mpi_comm()) end + # Unpack data from receive buffers d = MPI.Waitany(recv_requests) - end + while d !== nothing + recv_buffer = recv_buffers[d] + + for (index, mortar) in enumerate(mpi_neighbor_mortars[d]) + index_base = (index - 1) * n_small_elements * data_size + indices = buffer_mortar_indices(mesh, index_base, data_size) + for position in 1:n_small_elements + # Skip if received data for `position` is NaN as no real data has been sent for the + # corresponding element + if isnan(recv_buffer[Base.first(indices[position])]) + continue + end + + first, last = indices[position] + @views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last] + end + end + + d = MPI.Waitany(recv_requests) + end - # Wait for communication to finish - MPI.Waitall(send_requests, MPI.Status) + # Wait for communication to finish + MPI.Waitall(send_requests, MPI.Status) - return nothing + return nothing end - # Get normal direction of MPI mortar @inline function get_normal_direction(mpi_mortars::P4estMPIMortarContainer, indices...) - SVector(ntuple(@inline(dim -> mpi_mortars.normal_directions[dim, indices...]), - Val(ndims(mpi_mortars)))) + SVector(ntuple(@inline(dim->mpi_mortars.normal_directions[dim, indices...]), + Val(ndims(mpi_mortars)))) end - include("dg_2d_parallel.jl") include("dg_3d_parallel.jl") - - -end # muladd \ No newline at end of file +end # muladd diff --git a/src/solvers/dgsem_structured/containers.jl b/src/solvers/dgsem_structured/containers.jl index a44f2b3c88c..41eabf7c6bf 100644 --- a/src/solvers/dgsem_structured/containers.jl +++ b/src/solvers/dgsem_structured/containers.jl @@ -3,56 +3,67 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent - -struct ElementContainer{NDIMS, RealT<:Real, uEltype<:Real, NDIMSP1, NDIMSP2, NDIMSP3} - # Physical coordinates at each node - node_coordinates ::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] - # ID of neighbor element in negative direction in orientation - left_neighbors ::Array{Int, 2} # [orientation, elements] - # Jacobian matrix of the transformation - # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix ::Array{RealT, NDIMSP3} - # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors ::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] - # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian ::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] - # Buffer for calculated surface flux - surface_flux_values ::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] +struct ElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, NDIMSP2, NDIMSP3 + } + # Physical coordinates at each node + node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + # ID of neighbor element in negative direction in orientation + left_neighbors::Array{Int, 2} # [orientation, elements] + # Jacobian matrix of the transformation + # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... + jacobian_matrix::Array{RealT, NDIMSP3} + # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) + contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) + inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + # Buffer for calculated surface flux + surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] end - # Create element container and initialize element data function init_elements(mesh::StructuredMesh{NDIMS, RealT}, equations::AbstractEquations, - basis, ::Type{uEltype}) where {NDIMS, RealT<:Real, uEltype<:Real} + basis, + ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real} + nelements = prod(size(mesh)) + node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS, + ntuple(_ -> nnodes(basis), NDIMS)..., + nelements) + left_neighbors = Array{Int, 2}(undef, NDIMS, nelements) + jacobian_matrix = Array{RealT, NDIMS + 3}(undef, NDIMS, NDIMS, + ntuple(_ -> nnodes(basis), NDIMS)..., + nelements) + contravariant_vectors = similar(jacobian_matrix) + inverse_jacobian = Array{RealT, NDIMS + 1}(undef, + ntuple(_ -> nnodes(basis), NDIMS)..., + nelements) + surface_flux_values = Array{uEltype, NDIMS + 2}(undef, nvariables(equations), + ntuple(_ -> nnodes(basis), + NDIMS - 1)..., NDIMS * 2, + nelements) - nelements = prod(size(mesh)) - node_coordinates = Array{RealT, NDIMS+2}(undef, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements) - left_neighbors = Array{Int, 2}(undef, NDIMS, nelements) - jacobian_matrix = Array{RealT, NDIMS+3}(undef, NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements) - contravariant_vectors = similar(jacobian_matrix) - inverse_jacobian = Array{RealT, NDIMS+1}(undef, ntuple(_ -> nnodes(basis), NDIMS)..., nelements) - surface_flux_values = Array{uEltype, NDIMS+2}(undef, nvariables(equations), - ntuple(_ -> nnodes(basis), NDIMS-1)..., NDIMS*2, nelements) + elements = ElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, NDIMS + 3}(node_coordinates, + left_neighbors, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values) - elements = ElementContainer{NDIMS, RealT, uEltype, NDIMS+1, NDIMS+2, NDIMS+3}( - node_coordinates, left_neighbors, jacobian_matrix, contravariant_vectors, - inverse_jacobian, surface_flux_values) - - init_elements!(elements, mesh, basis) - return elements + init_elements!(elements, mesh, basis) + return elements end @inline nelements(elements::ElementContainer) = size(elements.left_neighbors, 2) -@inline Base.ndims(::ElementContainer{NDIMS}) where NDIMS = NDIMS - -Base.eltype(::ElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT, uEltype} = uEltype +@inline Base.ndims(::ElementContainer{NDIMS}) where {NDIMS} = NDIMS +function Base.eltype(::ElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT, + uEltype} + uEltype +end include("containers_1d.jl") include("containers_2d.jl") include("containers_3d.jl") - - end # @muladd diff --git a/src/solvers/dgsem_structured/containers_1d.jl b/src/solvers/dgsem_structured/containers_1d.jl index 97955dcec30..1a1bb183cb3 100644 --- a/src/solvers/dgsem_structured/containers_1d.jl +++ b/src/solvers/dgsem_structured/containers_1d.jl @@ -3,84 +3,83 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize data structures in element container function init_elements!(elements, mesh::StructuredMesh{1}, basis::LobattoLegendreBasis) - @unpack node_coordinates, left_neighbors, - jacobian_matrix, contravariant_vectors, inverse_jacobian = elements + @unpack node_coordinates, left_neighbors, + jacobian_matrix, contravariant_vectors, inverse_jacobian = elements - # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant - for cell_x in 1:size(mesh, 1) - calc_node_coordinates!(node_coordinates, cell_x, mesh.mapping, mesh, basis) + # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant + for cell_x in 1:size(mesh, 1) + calc_node_coordinates!(node_coordinates, cell_x, mesh.mapping, mesh, basis) - calc_jacobian_matrix!(jacobian_matrix, cell_x, node_coordinates, basis) + calc_jacobian_matrix!(jacobian_matrix, cell_x, node_coordinates, basis) - calc_inverse_jacobian!(inverse_jacobian, cell_x, jacobian_matrix) - end + calc_inverse_jacobian!(inverse_jacobian, cell_x, jacobian_matrix) + end - # Contravariant vectors don't make sense in 1D, they would be identical to inverse_jacobian - fill!(contravariant_vectors, NaN) + # Contravariant vectors don't make sense in 1D, they would be identical to inverse_jacobian + fill!(contravariant_vectors, NaN) - initialize_left_neighbor_connectivity!(left_neighbors, mesh) + initialize_left_neighbor_connectivity!(left_neighbors, mesh) - return nothing + return nothing end - # Calculate physical coordinates to which every node of the reference element is mapped # `mesh.mapping` is passed as an additional argument for type stability (function barrier) -function calc_node_coordinates!(node_coordinates, cell_x, mapping, mesh::StructuredMesh{1}, +function calc_node_coordinates!(node_coordinates, cell_x, mapping, + mesh::StructuredMesh{1}, basis::LobattoLegendreBasis) - @unpack nodes = basis + @unpack nodes = basis - # Get cell length in reference mesh - dx = 2 / size(mesh, 1) + # Get cell length in reference mesh + dx = 2 / size(mesh, 1) - # Calculate node coordinates of reference mesh - cell_x_offset = -1 + (cell_x-1) * dx + dx/2 + # Calculate node coordinates of reference mesh + cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2 - for i in eachnode(basis) - # node_coordinates are the mapped reference node_coordinates - node_coordinates[1, i, cell_x] = mapping(cell_x_offset + dx/2 * nodes[i])[1] - end + for i in eachnode(basis) + # node_coordinates are the mapped reference node_coordinates + node_coordinates[1, i, cell_x] = mapping(cell_x_offset + dx / 2 * nodes[i])[1] + end end - # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain -function calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates::AbstractArray{<:Any,3}, +function calc_jacobian_matrix!(jacobian_matrix, element, + node_coordinates::AbstractArray{<:Any, 3}, basis::LobattoLegendreBasis) - @views mul!(jacobian_matrix[1, 1, :, element], basis.derivative_matrix, node_coordinates[1, :, element]) # x_ξ + @views mul!(jacobian_matrix[1, 1, :, element], basis.derivative_matrix, + node_coordinates[1, :, element]) # x_ξ - return jacobian_matrix + return jacobian_matrix end - # Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node -function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 2}, element, jacobian_matrix) - @views inverse_jacobian[:, element] .= inv.(jacobian_matrix[1, 1, :, element]) +function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 2}, element, + jacobian_matrix) + @views inverse_jacobian[:, element] .= inv.(jacobian_matrix[1, 1, :, element]) - return inverse_jacobian + return inverse_jacobian end - # Save id of left neighbor of every element function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{1}) - # Neighbors in x-direction - # Inner elements - for cell_x in 2:size(mesh, 1) - left_neighbors[1, cell_x] = cell_x - 1 - end - - if isperiodic(mesh) - # Periodic boundary - left_neighbors[1, 1] = size(mesh, 1) - else - # Use boundary conditions - left_neighbors[1, 1] = 0 - end - - return left_neighbors + # Neighbors in x-direction + # Inner elements + for cell_x in 2:size(mesh, 1) + left_neighbors[1, cell_x] = cell_x - 1 + end + + if isperiodic(mesh) + # Periodic boundary + left_neighbors[1, 1] = size(mesh, 1) + else + # Use boundary conditions + left_neighbors[1, 1] = 0 + end + + return left_neighbors end - end # @muladd diff --git a/src/solvers/dgsem_structured/containers_2d.jl b/src/solvers/dgsem_structured/containers_2d.jl index e2b5aff8b0b..fb6db48e0a5 100644 --- a/src/solvers/dgsem_structured/containers_2d.jl +++ b/src/solvers/dgsem_structured/containers_2d.jl @@ -3,174 +3,187 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize data structures in element container function init_elements!(elements, mesh::StructuredMesh{2}, basis::LobattoLegendreBasis) - @unpack node_coordinates, left_neighbors, - jacobian_matrix, contravariant_vectors, inverse_jacobian = elements + @unpack node_coordinates, left_neighbors, + jacobian_matrix, contravariant_vectors, inverse_jacobian = elements - linear_indices = LinearIndices(size(mesh)) + linear_indices = LinearIndices(size(mesh)) - # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant - for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) - element = linear_indices[cell_x, cell_y] + # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant + for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) + element = linear_indices[cell_x, cell_y] - calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, mesh.mapping, mesh, basis) + calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, mesh.mapping, + mesh, basis) - calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) + calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix) + calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix) - end + calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix) + end - initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices) + initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices) - return nothing + return nothing end - # Calculate physical coordinates to which every node of the reference element is mapped # `mesh.mapping` is passed as an additional argument for type stability (function barrier) function calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, mapping, mesh::StructuredMesh{2}, basis::LobattoLegendreBasis) - @unpack nodes = basis + @unpack nodes = basis - # Get cell length in reference mesh - dx = 2 / size(mesh, 1) - dy = 2 / size(mesh, 2) + # Get cell length in reference mesh + dx = 2 / size(mesh, 1) + dy = 2 / size(mesh, 2) - # Calculate node coordinates of reference mesh - cell_x_offset = -1 + (cell_x-1) * dx + dx/2 - cell_y_offset = -1 + (cell_y-1) * dy + dy/2 + # Calculate node coordinates of reference mesh + cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2 + cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2 - for j in eachnode(basis), i in eachnode(basis) - # node_coordinates are the mapped reference node_coordinates - node_coordinates[:, i, j, element] .= mapping(cell_x_offset + dx/2 * nodes[i], - cell_y_offset + dy/2 * nodes[j]) - end + for j in eachnode(basis), i in eachnode(basis) + # node_coordinates are the mapped reference node_coordinates + node_coordinates[:, i, j, element] .= mapping(cell_x_offset + dx / 2 * nodes[i], + cell_y_offset + dy / 2 * nodes[j]) + end end - # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain -function calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates::AbstractArray{<:Any, 4}, basis::LobattoLegendreBasis) - @unpack derivative_matrix = basis - - # The code below is equivalent to the following matrix multiplications, which - # seem to end up calling generic linear algebra code from Julia. Thus, the - # optimized code below using `@turbo` is much faster. - # jacobian_matrix[1, 1, :, :, element] = derivative_matrix * node_coordinates[1, :, :, element] # x_ξ - # jacobian_matrix[2, 1, :, :, element] = derivative_matrix * node_coordinates[2, :, :, element] # y_ξ - # jacobian_matrix[1, 2, :, :, element] = node_coordinates[1, :, :, element] * derivative_matrix' # x_η - # jacobian_matrix[2, 2, :, :, element] = node_coordinates[2, :, :, element] * derivative_matrix' # y_η - - # x_ξ, y_ξ - @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) - for j in indices((jacobian_matrix, node_coordinates), (4, 3)), i in indices((jacobian_matrix, derivative_matrix), (3, 1)) - result = zero(eltype(jacobian_matrix)) - for ii in indices((node_coordinates, derivative_matrix), (2, 2)) - result += derivative_matrix[i, ii] * node_coordinates[xy, ii, j, element] - end - jacobian_matrix[xy, 1, i, j, element] = result +function calc_jacobian_matrix!(jacobian_matrix, element, + node_coordinates::AbstractArray{<:Any, 4}, + basis::LobattoLegendreBasis) + @unpack derivative_matrix = basis + + # The code below is equivalent to the following matrix multiplications, which + # seem to end up calling generic linear algebra code from Julia. Thus, the + # optimized code below using `@turbo` is much faster. + # jacobian_matrix[1, 1, :, :, element] = derivative_matrix * node_coordinates[1, :, :, element] # x_ξ + # jacobian_matrix[2, 1, :, :, element] = derivative_matrix * node_coordinates[2, :, :, element] # y_ξ + # jacobian_matrix[1, 2, :, :, element] = node_coordinates[1, :, :, element] * derivative_matrix' # x_η + # jacobian_matrix[2, 2, :, :, element] = node_coordinates[2, :, :, element] * derivative_matrix' # y_η + + # x_ξ, y_ξ + @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) + for j in indices((jacobian_matrix, node_coordinates), (4, 3)), + i in indices((jacobian_matrix, derivative_matrix), (3, 1)) + + result = zero(eltype(jacobian_matrix)) + for ii in indices((node_coordinates, derivative_matrix), (2, 2)) + result += derivative_matrix[i, ii] * + node_coordinates[xy, ii, j, element] + end + jacobian_matrix[xy, 1, i, j, element] = result + end end - end - - # x_η, y_η - @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) - for j in indices((jacobian_matrix, derivative_matrix), (4, 1)), i in indices((jacobian_matrix, node_coordinates), (3, 2)) - result = zero(eltype(jacobian_matrix)) - for jj in indices((node_coordinates, derivative_matrix), (3, 2)) - result += derivative_matrix[j, jj] * node_coordinates[xy, i, jj, element] - end - jacobian_matrix[xy, 2, i, j, element] = result + + # x_η, y_η + @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1)) + for j in indices((jacobian_matrix, derivative_matrix), (4, 1)), + i in indices((jacobian_matrix, node_coordinates), (3, 2)) + + result = zero(eltype(jacobian_matrix)) + for jj in indices((node_coordinates, derivative_matrix), (3, 2)) + result += derivative_matrix[j, jj] * + node_coordinates[xy, i, jj, element] + end + jacobian_matrix[xy, 2, i, j, element] = result + end end - end - return jacobian_matrix + return jacobian_matrix end - # Calculate contravarant vectors, multiplied by the Jacobian determinant J of the transformation mapping. # Those are called Ja^i in Kopriva's blue book. -function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any,5}, element, jacobian_matrix) - # The code below is equivalent to the following using broadcasting but much faster. - # # First contravariant vector Ja^1 - # contravariant_vectors[1, 1, :, :, element] = jacobian_matrix[2, 2, :, :, element] - # contravariant_vectors[2, 1, :, :, element] = -jacobian_matrix[1, 2, :, :, element] - # # Second contravariant vector Ja^2 - # contravariant_vectors[1, 2, :, :, element] = -jacobian_matrix[2, 1, :, :, element] - # contravariant_vectors[2, 2, :, :, element] = jacobian_matrix[1, 1, :, :, element] - - @turbo for j in indices((contravariant_vectors, jacobian_matrix), (4, 4)), - i in indices((contravariant_vectors, jacobian_matrix), (3, 3)) - # First contravariant vector Ja^1 - contravariant_vectors[1, 1, i, j, element] = jacobian_matrix[2, 2, i, j, element] - contravariant_vectors[2, 1, i, j, element] = -jacobian_matrix[1, 2, i, j, element] - - # Second contravariant vector Ja^2 - contravariant_vectors[1, 2, i, j, element] = -jacobian_matrix[2, 1, i, j, element] - contravariant_vectors[2, 2, i, j, element] = jacobian_matrix[1, 1, i, j, element] - end - - return contravariant_vectors -end +function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, 5}, + element, jacobian_matrix) + # The code below is equivalent to the following using broadcasting but much faster. + # # First contravariant vector Ja^1 + # contravariant_vectors[1, 1, :, :, element] = jacobian_matrix[2, 2, :, :, element] + # contravariant_vectors[2, 1, :, :, element] = -jacobian_matrix[1, 2, :, :, element] + # # Second contravariant vector Ja^2 + # contravariant_vectors[1, 2, :, :, element] = -jacobian_matrix[2, 1, :, :, element] + # contravariant_vectors[2, 2, :, :, element] = jacobian_matrix[1, 1, :, :, element] + + @turbo for j in indices((contravariant_vectors, jacobian_matrix), (4, 4)), + i in indices((contravariant_vectors, jacobian_matrix), (3, 3)) + # First contravariant vector Ja^1 + contravariant_vectors[1, 1, i, j, element] = jacobian_matrix[2, 2, i, j, + element] + contravariant_vectors[2, 1, i, j, element] = -jacobian_matrix[1, 2, i, j, + element] + + # Second contravariant vector Ja^2 + contravariant_vectors[1, 2, i, j, element] = -jacobian_matrix[2, 1, i, j, + element] + contravariant_vectors[2, 2, i, j, element] = jacobian_matrix[1, 1, i, j, + element] + end + return contravariant_vectors +end # Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node -function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any,3}, element, jacobian_matrix) - # The code below is equivalent to the following high-level code but much faster. - # inverse_jacobian[i, j, element] = inv(det(jacobian_matrix[:, :, i, j, element]) - - @turbo for j in indices((inverse_jacobian, jacobian_matrix), (2, 4)), - i in indices((inverse_jacobian, jacobian_matrix), (1, 3)) - inverse_jacobian[i, j, element] = inv(jacobian_matrix[1, 1, i, j, element] * jacobian_matrix[2, 2, i, j, element] - - jacobian_matrix[1, 2, i, j, element] * jacobian_matrix[2, 1, i, j, element]) - end +function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 3}, element, + jacobian_matrix) + # The code below is equivalent to the following high-level code but much faster. + # inverse_jacobian[i, j, element] = inv(det(jacobian_matrix[:, :, i, j, element]) + + @turbo for j in indices((inverse_jacobian, jacobian_matrix), (2, 4)), + i in indices((inverse_jacobian, jacobian_matrix), (1, 3)) + + inverse_jacobian[i, j, element] = inv(jacobian_matrix[1, 1, i, j, element] * + jacobian_matrix[2, 2, i, j, element] - + jacobian_matrix[1, 2, i, j, element] * + jacobian_matrix[2, 1, i, j, element]) + end - return inverse_jacobian + return inverse_jacobian end - # Save id of left neighbor of every element -function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{2}, linear_indices) - # Neighbors in x-direction - for cell_y in 1:size(mesh, 2) - # Inner elements - for cell_x in 2:size(mesh, 1) - element = linear_indices[cell_x, cell_y] - left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y] +function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{2}, + linear_indices) + # Neighbors in x-direction + for cell_y in 1:size(mesh, 2) + # Inner elements + for cell_x in 2:size(mesh, 1) + element = linear_indices[cell_x, cell_y] + left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y] + end + + if isperiodic(mesh, 1) + # Periodic boundary + left_neighbors[1, linear_indices[1, cell_y]] = linear_indices[end, cell_y] + else + # Use boundary conditions + left_neighbors[1, linear_indices[1, cell_y]] = 0 + end end - if isperiodic(mesh, 1) - # Periodic boundary - left_neighbors[1, linear_indices[1, cell_y]] = linear_indices[end, cell_y] - else - # Use boundary conditions - left_neighbors[1, linear_indices[1, cell_y]] = 0 + # Neighbors in y-direction + for cell_x in 1:size(mesh, 1) + # Inner elements + for cell_y in 2:size(mesh, 2) + element = linear_indices[cell_x, cell_y] + left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1] + end + + if isperiodic(mesh, 2) + # Periodic boundary + left_neighbors[2, linear_indices[cell_x, 1]] = linear_indices[cell_x, end] + else + # Use boundary conditions + left_neighbors[2, linear_indices[cell_x, 1]] = 0 + end end - end - - # Neighbors in y-direction - for cell_x in 1:size(mesh, 1) - # Inner elements - for cell_y in 2:size(mesh, 2) - element = linear_indices[cell_x, cell_y] - left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1] - end - - if isperiodic(mesh, 2) - # Periodic boundary - left_neighbors[2, linear_indices[cell_x, 1]] = linear_indices[cell_x, end] - else - # Use boundary conditions - left_neighbors[2, linear_indices[cell_x, 1]] = 0 - end - end - return left_neighbors + return left_neighbors end - end # @muladd diff --git a/src/solvers/dgsem_structured/containers_3d.jl b/src/solvers/dgsem_structured/containers_3d.jl index 1dc1ced4528..e843e869bf5 100644 --- a/src/solvers/dgsem_structured/containers_3d.jl +++ b/src/solvers/dgsem_structured/containers_3d.jl @@ -3,288 +3,342 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize data structures in element container function init_elements!(elements, mesh::StructuredMesh{3}, basis::LobattoLegendreBasis) - @unpack node_coordinates, left_neighbors, - jacobian_matrix, contravariant_vectors, inverse_jacobian = elements + @unpack node_coordinates, left_neighbors, + jacobian_matrix, contravariant_vectors, inverse_jacobian = elements - linear_indices = LinearIndices(size(mesh)) + linear_indices = LinearIndices(size(mesh)) - # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant - for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) - element = linear_indices[cell_x, cell_y, cell_z] + # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant + for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) + element = linear_indices[cell_x, cell_y, cell_z] - calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, cell_z, mesh.mapping, mesh, basis) + calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, cell_z, + mesh.mapping, mesh, basis) - calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) + calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis) - calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, node_coordinates, basis) + calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, + node_coordinates, basis) - calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis) - end + calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis) + end - initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices) + initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices) - return nothing + return nothing end - # Calculate physical coordinates to which every node of the reference element is mapped # `mesh.mapping` is passed as an additional argument for type stability (function barrier) function calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, cell_z, mapping, mesh::StructuredMesh{3}, basis::LobattoLegendreBasis) - @unpack nodes = basis - - # Get cell length in reference mesh - dx = 2 / size(mesh, 1) - dy = 2 / size(mesh, 2) - dz = 2 / size(mesh, 3) - - # Calculate node coordinates of reference mesh - cell_x_offset = -1 + (cell_x-1) * dx + dx/2 - cell_y_offset = -1 + (cell_y-1) * dy + dy/2 - cell_z_offset = -1 + (cell_z-1) * dz + dz/2 - - for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - # node_coordinates are the mapped reference node_coordinates - node_coordinates[:, i, j, k, element] .= mapping(cell_x_offset + dx/2 * nodes[i], - cell_y_offset + dy/2 * nodes[j], - cell_z_offset + dz/2 * nodes[k]) - end + @unpack nodes = basis + + # Get cell length in reference mesh + dx = 2 / size(mesh, 1) + dy = 2 / size(mesh, 2) + dz = 2 / size(mesh, 3) + + # Calculate node coordinates of reference mesh + cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2 + cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2 + cell_z_offset = -1 + (cell_z - 1) * dz + dz / 2 + + for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + # node_coordinates are the mapped reference node_coordinates + node_coordinates[:, i, j, k, element] .= mapping(cell_x_offset + + dx / 2 * nodes[i], + cell_y_offset + + dy / 2 * nodes[j], + cell_z_offset + + dz / 2 * nodes[k]) + end end - # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain -function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any,6}, element, node_coordinates, basis) - # The code below is equivalent to the following matrix multiplications but much faster. - # - # for dim in 1:3, j in eachnode(basis), i in eachnode(basis) - # # ∂/∂ξ - # jacobian_matrix[dim, 1, :, i, j, element] = basis.derivative_matrix * node_coordinates[dim, :, i, j, element] - # # ∂/∂η - # jacobian_matrix[dim, 2, i, :, j, element] = basis.derivative_matrix * node_coordinates[dim, i, :, j, element] - # # ∂/∂ζ - # jacobian_matrix[dim, 3, i, j, :, element] = basis.derivative_matrix * node_coordinates[dim, i, j, :, element] - # end - - @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(jacobian_matrix)) - - for ii in eachnode(basis) - result += basis.derivative_matrix[i, ii] * node_coordinates[dim, ii, j, k, element] +function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any, 6}, element, + node_coordinates, basis) + # The code below is equivalent to the following matrix multiplications but much faster. + # + # for dim in 1:3, j in eachnode(basis), i in eachnode(basis) + # # ∂/∂ξ + # jacobian_matrix[dim, 1, :, i, j, element] = basis.derivative_matrix * node_coordinates[dim, :, i, j, element] + # # ∂/∂η + # jacobian_matrix[dim, 2, i, :, j, element] = basis.derivative_matrix * node_coordinates[dim, i, :, j, element] + # # ∂/∂ζ + # jacobian_matrix[dim, 3, i, j, :, element] = basis.derivative_matrix * node_coordinates[dim, i, j, :, element] + # end + + @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), + i in eachnode(basis) + + result = zero(eltype(jacobian_matrix)) + + for ii in eachnode(basis) + result += basis.derivative_matrix[i, ii] * + node_coordinates[dim, ii, j, k, element] + end + + jacobian_matrix[dim, 1, i, j, k, element] = result end - jacobian_matrix[dim, 1, i, j, k, element] = result - end + @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), + i in eachnode(basis) + + result = zero(eltype(jacobian_matrix)) - @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(jacobian_matrix)) + for ii in eachnode(basis) + result += basis.derivative_matrix[j, ii] * + node_coordinates[dim, i, ii, k, element] + end - for ii in eachnode(basis) - result += basis.derivative_matrix[j, ii] * node_coordinates[dim, i, ii, k, element] + jacobian_matrix[dim, 2, i, j, k, element] = result end - jacobian_matrix[dim, 2, i, j, k, element] = result - end + @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), + i in eachnode(basis) - @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(jacobian_matrix)) + result = zero(eltype(jacobian_matrix)) - for ii in eachnode(basis) - result += basis.derivative_matrix[k, ii] * node_coordinates[dim, i, j, ii, element] - end + for ii in eachnode(basis) + result += basis.derivative_matrix[k, ii] * + node_coordinates[dim, i, j, ii, element] + end - jacobian_matrix[dim, 3, i, j, k, element] = result - end + jacobian_matrix[dim, 3, i, j, k, element] = result + end - return jacobian_matrix + return jacobian_matrix end - # Calculate contravariant vectors, multiplied by the Jacobian determinant J of the transformation mapping, # using the invariant curl form. # These are called Ja^i in Kopriva's blue book. -function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any,6}, element, - jacobian_matrix, node_coordinates, basis::LobattoLegendreBasis) - @unpack derivative_matrix = basis - - # The general form is - # Jaⁱₙ = 0.5 * ( ∇ × (Xₘ ∇ Xₗ - Xₗ ∇ Xₘ) )ᵢ where (n, m, l) cyclic and ∇ = (∂/∂ξ, ∂/∂η, ∂/∂ζ)ᵀ - - for n in 1:3 - # (n, m, l) cyclic - m = (n % 3) + 1 - l = ((n + 1) % 3) + 1 - - # Calculate Ja¹ₙ = 0.5 * [ (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η - (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ ] - # For each of these, the first and second summand are computed in separate loops - # for performance reasons. - - # First summand 0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to j-dimension to differentiate wrt η - result += 0.5 * derivative_matrix[j, ii] * ( - node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 3, i, ii, k, element] - - node_coordinates[l, i, ii, k, element] * jacobian_matrix[m, 3, i, ii, k, element]) - end - - contravariant_vectors[n, 1, i, j, k, element] = result +function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, 6}, + element, + jacobian_matrix, node_coordinates, + basis::LobattoLegendreBasis) + @unpack derivative_matrix = basis + + # The general form is + # Jaⁱₙ = 0.5 * ( ∇ × (Xₘ ∇ Xₗ - Xₗ ∇ Xₘ) )ᵢ where (n, m, l) cyclic and ∇ = (∂/∂ξ, ∂/∂η, ∂/∂ζ)ᵀ + + for n in 1:3 + # (n, m, l) cyclic + m = (n % 3) + 1 + l = ((n + 1) % 3) + 1 + + # Calculate Ja¹ₙ = 0.5 * [ (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η - (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ ] + # For each of these, the first and second summand are computed in separate loops + # for performance reasons. + + # First summand 0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to j-dimension to differentiate wrt η + result += 0.5 * derivative_matrix[j, ii] * + (node_coordinates[m, i, ii, k, element] * + jacobian_matrix[l, 3, i, ii, k, element] - + node_coordinates[l, i, ii, k, element] * + jacobian_matrix[m, 3, i, ii, k, element]) + end + + contravariant_vectors[n, 1, i, j, k, element] = result + end + + # Second summand -0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to k-dimension to differentiate wrt ζ + result += 0.5 * derivative_matrix[k, ii] * + (node_coordinates[m, i, j, ii, element] * + jacobian_matrix[l, 2, i, j, ii, element] - + node_coordinates[l, i, j, ii, element] * + jacobian_matrix[m, 2, i, j, ii, element]) + end + + contravariant_vectors[n, 1, i, j, k, element] -= result + end + + # Calculate Ja²ₙ = 0.5 * [ (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ - (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ ] + + # First summand 0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to k-dimension to differentiate wrt ζ + result += 0.5 * derivative_matrix[k, ii] * + (node_coordinates[m, i, j, ii, element] * + jacobian_matrix[l, 1, i, j, ii, element] - + node_coordinates[l, i, j, ii, element] * + jacobian_matrix[m, 1, i, j, ii, element]) + end + + contravariant_vectors[n, 2, i, j, k, element] = result + end + + # Second summand -0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to i-dimension to differentiate wrt ξ + result += 0.5 * derivative_matrix[i, ii] * + (node_coordinates[m, ii, j, k, element] * + jacobian_matrix[l, 3, ii, j, k, element] - + node_coordinates[l, ii, j, k, element] * + jacobian_matrix[m, 3, ii, j, k, element]) + end + + contravariant_vectors[n, 2, i, j, k, element] -= result + end + + # Calculate Ja³ₙ = 0.5 * [ (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ - (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η ] + + # First summand 0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to i-dimension to differentiate wrt ξ + result += 0.5 * derivative_matrix[i, ii] * + (node_coordinates[m, ii, j, k, element] * + jacobian_matrix[l, 2, ii, j, k, element] - + node_coordinates[l, ii, j, k, element] * + jacobian_matrix[m, 2, ii, j, k, element]) + end + + contravariant_vectors[n, 3, i, j, k, element] = result + end + + # Second summand -0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η + @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + result = zero(eltype(contravariant_vectors)) + + for ii in eachnode(basis) + # Multiply derivative_matrix to j-dimension to differentiate wrt η + result += 0.5 * derivative_matrix[j, ii] * + (node_coordinates[m, i, ii, k, element] * + jacobian_matrix[l, 1, i, ii, k, element] - + node_coordinates[l, i, ii, k, element] * + jacobian_matrix[m, 1, i, ii, k, element]) + end + + contravariant_vectors[n, 3, i, j, k, element] -= result + end end - # Second summand -0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to k-dimension to differentiate wrt ζ - result += 0.5 * derivative_matrix[k, ii] * ( - node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 2, i, j, ii, element] - - node_coordinates[l, i, j, ii, element] * jacobian_matrix[m, 2, i, j, ii, element]) - end - - contravariant_vectors[n, 1, i, j, k, element] -= result - end - - # Calculate Ja²ₙ = 0.5 * [ (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ - (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ ] - - # First summand 0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to k-dimension to differentiate wrt ζ - result += 0.5 * derivative_matrix[k, ii] * ( - node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 1, i, j, ii, element] - - node_coordinates[l, i, j, ii, element] * jacobian_matrix[m, 1, i, j, ii, element]) - end - - contravariant_vectors[n, 2, i, j, k, element] = result - end - - # Second summand -0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to i-dimension to differentiate wrt ξ - result += 0.5 * derivative_matrix[i, ii] * ( - node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 3, ii, j, k, element] - - node_coordinates[l, ii, j, k, element] * jacobian_matrix[m, 3, ii, j, k, element]) - end - - contravariant_vectors[n, 2, i, j, k, element] -= result - end - - # Calculate Ja³ₙ = 0.5 * [ (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ - (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η ] + return contravariant_vectors +end - # First summand 0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ +# Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node +function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 4}, element, + jacobian_matrix, basis) @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to i-dimension to differentiate wrt ξ - result += 0.5 * derivative_matrix[i, ii] * ( - node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 2, ii, j, k, element] - - node_coordinates[l, ii, j, k, element] * jacobian_matrix[m, 2, ii, j, k, element]) - end - - contravariant_vectors[n, 3, i, j, k, element] = result + # Calculate Determinant by using Sarrus formula (about 100 times faster than LinearAlgebra.det()) + inverse_jacobian[i, j, k, element] = inv(jacobian_matrix[1, 1, i, j, k, + element] * + jacobian_matrix[2, 2, i, j, k, + element] * + jacobian_matrix[3, 3, i, j, k, element] + + jacobian_matrix[1, 2, i, j, k, + element] * + jacobian_matrix[2, 3, i, j, k, + element] * + jacobian_matrix[3, 1, i, j, k, element] + + jacobian_matrix[1, 3, i, j, k, + element] * + jacobian_matrix[2, 1, i, j, k, + element] * + jacobian_matrix[3, 2, i, j, k, element] - + jacobian_matrix[3, 1, i, j, k, + element] * + jacobian_matrix[2, 2, i, j, k, + element] * + jacobian_matrix[1, 3, i, j, k, element] - + jacobian_matrix[3, 2, i, j, k, + element] * + jacobian_matrix[2, 3, i, j, k, + element] * + jacobian_matrix[1, 1, i, j, k, element] - + jacobian_matrix[3, 3, i, j, k, + element] * + jacobian_matrix[2, 1, i, j, k, + element] * + jacobian_matrix[1, 2, i, j, k, element]) end - # Second summand -0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - result = zero(eltype(contravariant_vectors)) - - for ii in eachnode(basis) - # Multiply derivative_matrix to j-dimension to differentiate wrt η - result += 0.5 * derivative_matrix[j, ii] * ( - node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 1, i, ii, k, element] - - node_coordinates[l, i, ii, k, element] * jacobian_matrix[m, 1, i, ii, k, element]) - end - - contravariant_vectors[n, 3, i, j, k, element] -= result - end - end - - return contravariant_vectors -end - - -# Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node -function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 4}, element, jacobian_matrix, basis) - @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - # Calculate Determinant by using Sarrus formula (about 100 times faster than LinearAlgebra.det()) - inverse_jacobian[i, j, k, element] = inv( - jacobian_matrix[1, 1, i, j, k, element] * jacobian_matrix[2, 2, i, j, k, element] * jacobian_matrix[3, 3, i, j, k, element] + - jacobian_matrix[1, 2, i, j, k, element] * jacobian_matrix[2, 3, i, j, k, element] * jacobian_matrix[3, 1, i, j, k, element] + - jacobian_matrix[1, 3, i, j, k, element] * jacobian_matrix[2, 1, i, j, k, element] * jacobian_matrix[3, 2, i, j, k, element] - - jacobian_matrix[3, 1, i, j, k, element] * jacobian_matrix[2, 2, i, j, k, element] * jacobian_matrix[1, 3, i, j, k, element] - - jacobian_matrix[3, 2, i, j, k, element] * jacobian_matrix[2, 3, i, j, k, element] * jacobian_matrix[1, 1, i, j, k, element] - - jacobian_matrix[3, 3, i, j, k, element] * jacobian_matrix[2, 1, i, j, k, element] * jacobian_matrix[1, 2, i, j, k, element] ) - end - - return inverse_jacobian + return inverse_jacobian end - # Save id of left neighbor of every element -function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{3}, linear_indices) - # Neighbors in x-direction - for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2) - # Inner elements - for cell_x in 2:size(mesh, 1) - element = linear_indices[cell_x, cell_y, cell_z] - left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y, cell_z] - end - - if isperiodic(mesh, 1) - # Periodic boundary - left_neighbors[1, linear_indices[1, cell_y, cell_z]] = linear_indices[end, cell_y, cell_z] - else - left_neighbors[1, linear_indices[1, cell_y, cell_z]] = 0 - end - end - - # Neighbors in y-direction - for cell_z in 1:size(mesh, 3), cell_x in 1:size(mesh, 1) - # Inner elements - for cell_y in 2:size(mesh, 2) - element = linear_indices[cell_x, cell_y, cell_z] - left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1, cell_z] +function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{3}, + linear_indices) + # Neighbors in x-direction + for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2) + # Inner elements + for cell_x in 2:size(mesh, 1) + element = linear_indices[cell_x, cell_y, cell_z] + left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y, cell_z] + end + + if isperiodic(mesh, 1) + # Periodic boundary + left_neighbors[1, linear_indices[1, cell_y, cell_z]] = linear_indices[end, + cell_y, + cell_z] + else + left_neighbors[1, linear_indices[1, cell_y, cell_z]] = 0 + end end - if isperiodic(mesh, 2) - # Periodic boundary - left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = linear_indices[cell_x, end, cell_z] - else - left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = 0 - end - end - - # Neighbors in z-direction - for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) - # Inner elements - for cell_z in 2:size(mesh, 3) - element = linear_indices[cell_x, cell_y, cell_z] - left_neighbors[3, element] = linear_indices[cell_x, cell_y, cell_z - 1] + # Neighbors in y-direction + for cell_z in 1:size(mesh, 3), cell_x in 1:size(mesh, 1) + # Inner elements + for cell_y in 2:size(mesh, 2) + element = linear_indices[cell_x, cell_y, cell_z] + left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1, cell_z] + end + + if isperiodic(mesh, 2) + # Periodic boundary + left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = linear_indices[cell_x, + end, + cell_z] + else + left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = 0 + end end - if isperiodic(mesh, 3) - # Periodic boundary - left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = linear_indices[cell_x, cell_y, end] - else - left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = 0 + # Neighbors in z-direction + for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1) + # Inner elements + for cell_z in 2:size(mesh, 3) + element = linear_indices[cell_x, cell_y, cell_z] + left_neighbors[3, element] = linear_indices[cell_x, cell_y, cell_z - 1] + end + + if isperiodic(mesh, 3) + # Periodic boundary + left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = linear_indices[cell_x, + cell_y, + end] + else + left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = 0 + end end - end - return left_neighbors + return left_neighbors end - end # @muladd diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index c4ba534b496..5cf4c4ef78c 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -3,68 +3,74 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache(mesh::StructuredMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real} - elements = init_elements(mesh, equations, dg.basis, uEltype) +function create_cache(mesh::StructuredMesh, equations::AbstractEquations, dg::DG, ::Any, + ::Type{uEltype}) where {uEltype <: Real} + elements = init_elements(mesh, equations, dg.basis, uEltype) - cache = (; elements) + cache = (; elements) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - return cache + return cache end # Extract contravariant vector Ja^i (i = index) as SVector @inline function get_contravariant_vector(index, contravariant_vectors, indices...) - SVector(ntuple(@inline(dim -> contravariant_vectors[dim, index, indices...]), Val(ndims(contravariant_vectors) - 3))) + SVector(ntuple(@inline(dim->contravariant_vectors[dim, index, indices...]), + Val(ndims(contravariant_vectors) - 3))) end - -@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation, +@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, + orientation, boundary_condition::BoundaryConditionPeriodic, mesh::StructuredMesh, equations, surface_integral, dg::DG, cache, - direction, node_indices, surface_node_indices, element) - @assert isperiodic(mesh, orientation) + direction, node_indices, + surface_node_indices, element) + @assert isperiodic(mesh, orientation) end - -@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation, +@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, + orientation, boundary_condition, mesh::StructuredMesh, equations, surface_integral, dg::DG, cache, - direction, node_indices, surface_node_indices, element) - @unpack node_coordinates, contravariant_vectors, inverse_jacobian = cache.elements - @unpack surface_flux = surface_integral - - u_inner = get_node_vars(u, equations, dg, node_indices..., element) - x = get_node_coords(node_coordinates, equations, dg, node_indices..., element) - - # If the mapping is orientation-reversing, the contravariant vectors' orientation - # is reversed as well. The normal vector must be oriented in the direction - # from `left_element` to `right_element`, or the numerical flux will be computed - # incorrectly (downwind direction). - sign_jacobian = sign(inverse_jacobian[node_indices..., element]) - - # Contravariant vector Ja^i is the normal vector - normal = sign_jacobian * get_contravariant_vector(orientation, contravariant_vectors, - node_indices..., element) - - # If the mapping is orientation-reversing, the normal vector will be reversed (see above). - # However, the flux now has the wrong sign, since we need the physical flux in normal direction. - flux = sign_jacobian * boundary_condition(u_inner, normal, direction, x, t, surface_flux, equations) - - for v in eachvariable(equations) - surface_flux_values[v, surface_node_indices..., direction, element] = flux[v] - end + direction, node_indices, + surface_node_indices, element) + @unpack node_coordinates, contravariant_vectors, inverse_jacobian = cache.elements + @unpack surface_flux = surface_integral + + u_inner = get_node_vars(u, equations, dg, node_indices..., element) + x = get_node_coords(node_coordinates, equations, dg, node_indices..., element) + + # If the mapping is orientation-reversing, the contravariant vectors' orientation + # is reversed as well. The normal vector must be oriented in the direction + # from `left_element` to `right_element`, or the numerical flux will be computed + # incorrectly (downwind direction). + sign_jacobian = sign(inverse_jacobian[node_indices..., element]) + + # Contravariant vector Ja^i is the normal vector + normal = sign_jacobian * + get_contravariant_vector(orientation, contravariant_vectors, + node_indices..., element) + + # If the mapping is orientation-reversing, the normal vector will be reversed (see above). + # However, the flux now has the wrong sign, since we need the physical flux in normal direction. + flux = sign_jacobian * + boundary_condition(u_inner, normal, direction, x, t, surface_flux, equations) + + for v in eachvariable(equations) + surface_flux_values[v, surface_node_indices..., direction, element] = flux[v] + end end - include("containers.jl") include("dg_1d.jl") include("dg_2d.jl") @@ -77,6 +83,4 @@ include("indicators_3d.jl") # Specialized implementations used to improve performance include("dg_2d_compressible_euler.jl") include("dg_3d_compressible_euler.jl") - - end # @muladd diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index e33328a8204..3d63cc5af36 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -3,107 +3,114 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function rhs!(du, u, t, mesh::StructuredMesh{1}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end - # Calculate interface and boundary fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache, u, mesh, equations, dg.surface_integral, dg) + # Calculate interface and boundary fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, u, mesh, equations, dg.surface_integral, dg) + end - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end - return nothing + return nothing end - function calc_interface_flux!(cache, u, mesh::StructuredMesh{1}, equations, surface_integral, dg::DG) - @unpack surface_flux = surface_integral + @unpack surface_flux = surface_integral - @threaded for element in eachelement(dg, cache) - left_element = cache.elements.left_neighbors[1, element] + @threaded for element in eachelement(dg, cache) + left_element = cache.elements.left_neighbors[1, element] - if left_element > 0 # left_element = 0 at boundaries - u_ll = get_node_vars(u, equations, dg, nnodes(dg), left_element) - u_rr = get_node_vars(u, equations, dg, 1, element) + if left_element > 0 # left_element = 0 at boundaries + u_ll = get_node_vars(u, equations, dg, nnodes(dg), left_element) + u_rr = get_node_vars(u, equations, dg, 1, element) - f1 = surface_flux(u_ll, u_rr, 1, equations) + f1 = surface_flux(u_ll, u_rr, 1, equations) - for v in eachvariable(equations) - cache.elements.surface_flux_values[v, 2, left_element] = f1[v] - cache.elements.surface_flux_values[v, 1, element] = f1[v] - end + for v in eachvariable(equations) + cache.elements.surface_flux_values[v, 2, left_element] = f1[v] + cache.elements.surface_flux_values[v, 1, element] = f1[v] + end + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic, - mesh::StructuredMesh{1}, equations, surface_integral, dg::DG) - @assert isperiodic(mesh) + mesh::StructuredMesh{1}, equations, surface_integral, + dg::DG) + @assert isperiodic(mesh) end function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, - mesh::StructuredMesh{1}, equations, surface_integral, dg::DG) - @unpack surface_flux = surface_integral - @unpack surface_flux_values, node_coordinates = cache.elements + mesh::StructuredMesh{1}, equations, surface_integral, + dg::DG) + @unpack surface_flux = surface_integral + @unpack surface_flux_values, node_coordinates = cache.elements - orientation = 1 + orientation = 1 - # Negative x-direction - direction = 1 + # Negative x-direction + direction = 1 - u_rr = get_node_vars(u, equations, dg, 1, 1) - x = get_node_coords(node_coordinates, equations, dg, 1, 1) + u_rr = get_node_vars(u, equations, dg, 1, 1) + x = get_node_coords(node_coordinates, equations, dg, 1, 1) - flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, surface_flux, equations) + flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, + surface_flux, equations) - for v in eachvariable(equations) - surface_flux_values[v, direction, 1] = flux[v] - end + for v in eachvariable(equations) + surface_flux_values[v, direction, 1] = flux[v] + end - # Positive x-direction - direction = 2 + # Positive x-direction + direction = 2 - u_rr = get_node_vars(u, equations, dg, nnodes(dg), nelements(dg, cache)) - x = get_node_coords(node_coordinates, equations, dg, nnodes(dg), nelements(dg, cache)) + u_rr = get_node_vars(u, equations, dg, nnodes(dg), nelements(dg, cache)) + x = get_node_coords(node_coordinates, equations, dg, nnodes(dg), + nelements(dg, cache)) - flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, surface_flux, equations) + flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, + surface_flux, equations) - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, direction, nelements(dg, cache)] = flux[v] - end + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, direction, nelements(dg, cache)] = flux[v] + end end - - end # @muladd diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index a8972dfe766..c013bf62d98 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -3,413 +3,465 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function rhs!(du, u, t, mesh::StructuredMesh{2}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache, u, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, u, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg) + end -@inline function weak_form_kernel!(du, u, - element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, - nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements - - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - flux1 = flux(u_node, 1, equations) - flux2 = flux(u_node, 2, equations) - - # Compute the contravariant flux by taking the scalar product of the - # first contravariant vector Ja^1 and the flux vector - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) - contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], contravariant_flux1, equations, dg, ii, j, element) + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) end - # Compute the contravariant flux by taking the scalar product of the - # second contravariant vector Ja^2 and the flux vector - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) - contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], contravariant_flux2, equations, dg, i, jj, element) + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) end - end - return nothing + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing end +@inline function weak_form_kernel!(du, u, + element, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, + nonconservative_terms::False, equations, + dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_dhat = dg.basis + @unpack contravariant_vectors = cache.elements + + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + flux1 = flux(u_node, 1, equations) + flux2 = flux(u_node, 2, equations) + + # Compute the contravariant flux by taking the scalar product of the + # first contravariant vector Ja^1 and the flux vector + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element) + contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], + contravariant_flux1, equations, dg, ii, j, + element) + end + + # Compute the contravariant flux by taking the scalar product of the + # second contravariant vector Ja^2 and the flux vector + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element) + contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], + contravariant_flux2, equations, dg, i, jj, + element) + end + end + + return nothing +end @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + element, + mesh::Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2} + }, nonconservative_terms::False, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Calculate volume integral in one element - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - # pull the contravariant vectors in each coordinate direction - Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) - Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element) - - # All diagonal entries of `derivative_split` are zero. Thus, we can skip - # the computation of the diagonal terms. In addition, we use the symmetry - # of the `volume_flux` to save half of the possible two-point flux - # computations. - - # x direction - for ii in (i+1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, element) - # pull the contravariant vectors and compute the average - Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, element) - Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) - # compute the contravariant sharp flux in the direction of the - # averaged contravariant vector - fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, equations, dg, i, j, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, equations, dg, ii, j, element) - end + volume_flux, dg::DGSEM, cache, alpha = true) + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements - # y direction - for jj in (j+1):nnodes(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, element) - # pull the contravariant vectors and compute the average - Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, element) - Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) - # compute the contravariant sharp flux in the direction of the - # averaged contravariant vector - fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, equations, dg, i, j, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, equations, dg, i, jj, element) + # Calculate volume integral in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + # pull the contravariant vectors in each coordinate direction + Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) + Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):nnodes(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, element) + # pull the contravariant vectors and compute the average + Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, + element) + Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, + equations, dg, i, j, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, + equations, dg, ii, j, element) + end + + # y direction + for jj in (j + 1):nnodes(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, element) + # pull the contravariant vectors and compute the average + Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, + element) + Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, + equations, dg, i, j, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, + equations, dg, i, jj, element) + end end - end end @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + element, + mesh::Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2} + }, nonconservative_terms::True, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - symmetric_flux, nonconservative_flux = volume_flux - - # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha) - - # Calculate the remaining volume terms using the nonsymmetric generalized flux - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - # pull the contravariant vectors in each coordinate direction - Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) - Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element) - - # The diagonal terms are zero since the diagonal of `derivative_split` - # is zero. We ignore this for now. - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # Thus, we need to pass both to the nonconservative flux. - - # x direction - integral_contribution = zero(u_node) - for ii in eachnode(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, element) - # pull the contravariant vectors and compute the average - Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, element) - Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) - # Compute the contravariant nonconservative flux. - fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, equations) - integral_contribution = integral_contribution + derivative_split[i, ii] * fluxtilde1 - end + volume_flux, dg::DGSEM, cache, alpha = true) + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + symmetric_flux, nonconservative_flux = volume_flux - # y direction - for jj in eachnode(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, element) - # pull the contravariant vectors and compute the average - Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, element) - Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) - # compute the contravariant nonconservative flux in the direction of the - # averaged contravariant vector - fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, equations) - integral_contribution = integral_contribution + derivative_split[j, jj] * fluxtilde2 - end + # Apply the symmetric flux as usual + flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + dg, cache, alpha) - # The factor 0.5 cancels the factor 2 in the flux differencing form - multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, element) - end + # Calculate the remaining volume terms using the nonsymmetric generalized flux + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + # pull the contravariant vectors in each coordinate direction + Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) + Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element) + + # The diagonal terms are zero since the diagonal of `derivative_split` + # is zero. We ignore this for now. + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # Thus, we need to pass both to the nonconservative flux. + + # x direction + integral_contribution = zero(u_node) + for ii in eachnode(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, element) + # pull the contravariant vectors and compute the average + Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, + element) + Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) + # Compute the contravariant nonconservative flux. + fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, + equations) + integral_contribution = integral_contribution + + derivative_split[i, ii] * fluxtilde1 + end + + # y direction + for jj in eachnode(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, element) + # pull the contravariant vectors and compute the average + Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, + element) + Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) + # compute the contravariant nonconservative flux in the direction of the + # averaged contravariant vector + fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, + equations) + integral_contribution = integral_contribution + + derivative_split[j, jj] * fluxtilde2 + end + + # The factor 0.5 cancels the factor 2 in the flux differencing form + multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, + dg, i, j, element) + end end - # Computing the normal vector for the FV method on curvilinear subcells. # To fulfill free-stream preservation we use the explicit formula B.53 in Appendix B.4 # by Hennemann, Rueda-Ramirez, Hindenlang, Gassner (2020) # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) - @unpack contravariant_vectors = cache.elements - @unpack weights, derivative_matrix = dg.basis + @unpack contravariant_vectors = cache.elements + @unpack weights, derivative_matrix = dg.basis - # Performance improvement if the metric terms of the subcell FV method are only computed - # once at the beginning of the simulation, instead of at every Runge-Kutta stage - fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R)) + # Performance improvement if the metric terms of the subcell FV method are only computed + # once at the beginning of the simulation, instead of at every Runge-Kutta stage + fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R)) - for j in eachnode(dg) - normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, element) + for j in eachnode(dg) + normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, + element) - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) - for m in 1:nnodes(dg) - normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, element) - end + for m in 1:nnodes(dg) + normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] * + get_contravariant_vector(1, contravariant_vectors, + m, j, element) + end - # Compute the contravariant flux - contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) + # Compute the contravariant flux + contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) - set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j) - set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j) + set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j) + set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j) + end end - end - - fstar2_L[:, :, 1 ] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 ] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R)) - - for i in eachnode(dg) - normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, element) - for j in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) + fstar2_L[:, :, 1] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R)) - for m in 1:nnodes(dg) - normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, element) - end - - # Compute the contravariant flux by taking the scalar product of the - # normal vector and the flux vector - contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) - - set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j) - set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j) + for i in eachnode(dg) + normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, + element) + + for j in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + + for m in 1:nnodes(dg) + normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] * + get_contravariant_vector(2, contravariant_vectors, + i, m, element) + end + + # Compute the contravariant flux by taking the scalar product of the + # normal vector and the flux vector + contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) + + set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j) + set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j) + end end - end - return nothing + return nothing end # Calculate the finite volume fluxes inside curvilinear elements (**with non-conservative terms**). -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4}, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, + u::AbstractArray{<:Any, 4}, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) - @unpack contravariant_vectors = cache.elements - @unpack weights, derivative_matrix = dg.basis - - volume_flux, nonconservative_flux = volume_flux_fv - - # Performance improvement if the metric terms of the subcell FV method are only computed - # once at the beginning of the simulation, instead of at every Runge-Kutta stage - fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R)) - - for j in eachnode(dg) - normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, element) - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - - for m in eachnode(dg) - normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, element) - end - - # Compute the conservative part of the contravariant flux - ftilde1 = volume_flux(u_ll, u_rr, normal_direction, equations) - - # Compute and add in the nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - ftilde1_L = ftilde1 + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - ftilde1_R = ftilde1 + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - set_node_vars!(fstar1_L, ftilde1_L, equations, dg, i, j) - set_node_vars!(fstar1_R, ftilde1_R, equations, dg, i, j) + @unpack contravariant_vectors = cache.elements + @unpack weights, derivative_matrix = dg.basis + + volume_flux, nonconservative_flux = volume_flux_fv + + # Performance improvement if the metric terms of the subcell FV method are only computed + # once at the beginning of the simulation, instead of at every Runge-Kutta stage + fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R)) + + for j in eachnode(dg) + normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, + element) + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + + for m in eachnode(dg) + normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] * + get_contravariant_vector(1, contravariant_vectors, + m, j, element) + end + + # Compute the conservative part of the contravariant flux + ftilde1 = volume_flux(u_ll, u_rr, normal_direction, equations) + + # Compute and add in the nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + ftilde1_L = ftilde1 + + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + ftilde1_R = ftilde1 + + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + set_node_vars!(fstar1_L, ftilde1_L, equations, dg, i, j) + set_node_vars!(fstar1_R, ftilde1_R, equations, dg, i, j) + end end - end - - # Fluxes in y - fstar2_L[:, :, 1 ] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 ] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R)) - - # Compute inner fluxes - for i in eachnode(dg) - normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, element) - - for j in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - - for m in eachnode(dg) - normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, element) - end - - # Compute the conservative part of the contravariant flux - ftilde2 = volume_flux(u_ll, u_rr, normal_direction, equations) - - # Compute and add in the nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - ftilde2_L = ftilde2 + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - ftilde2_R = ftilde2 + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - set_node_vars!(fstar2_L, ftilde2_L, equations, dg, i, j) - set_node_vars!(fstar2_R, ftilde2_R, equations, dg, i, j) + + # Fluxes in y + fstar2_L[:, :, 1] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R)) + + # Compute inner fluxes + for i in eachnode(dg) + normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, + element) + + for j in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + + for m in eachnode(dg) + normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] * + get_contravariant_vector(2, contravariant_vectors, + i, m, element) + end + + # Compute the conservative part of the contravariant flux + ftilde2 = volume_flux(u_ll, u_rr, normal_direction, equations) + + # Compute and add in the nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + ftilde2_L = ftilde2 + + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + ftilde2_R = ftilde2 + + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + set_node_vars!(fstar2_L, ftilde2_L, equations, dg, i, j) + set_node_vars!(fstar2_R, ftilde2_R, equations, dg, i, j) + end end - end - return nothing + return nothing end - function calc_interface_flux!(cache, u, mesh::StructuredMesh{2}, nonconservative_terms, # can be True/False equations, surface_integral, dg::DG) - @unpack elements = cache - - @threaded for element in eachelement(dg, cache) - # Interfaces in negative directions - # Faster version of "for orientation in (1, 2)" - - # Interfaces in x-direction (`orientation` = 1) - calc_interface_flux!(elements.surface_flux_values, - elements.left_neighbors[1, element], - element, 1, u, mesh, - nonconservative_terms, equations, - surface_integral, dg, cache) - - # Interfaces in y-direction (`orientation` = 2) - calc_interface_flux!(elements.surface_flux_values, - elements.left_neighbors[2, element], - element, 2, u, mesh, - nonconservative_terms, equations, - surface_integral, dg, cache) - end - - return nothing -end + @unpack elements = cache + + @threaded for element in eachelement(dg, cache) + # Interfaces in negative directions + # Faster version of "for orientation in (1, 2)" + + # Interfaces in x-direction (`orientation` = 1) + calc_interface_flux!(elements.surface_flux_values, + elements.left_neighbors[1, element], + element, 1, u, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache) + + # Interfaces in y-direction (`orientation` = 2) + calc_interface_flux!(elements.surface_flux_values, + elements.left_neighbors[2, element], + element, 2, u, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache) + end + return nothing +end @inline function calc_interface_flux!(surface_flux_values, left_element, right_element, orientation, u, mesh::StructuredMesh{2}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - # This is slow for LSA, but for some reason faster for Euler (see #519) - if left_element <= 0 # left_element = 0 at boundaries - return nothing - end - - @unpack surface_flux = surface_integral - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - right_direction = 2 * orientation - left_direction = right_direction - 1 - - for i in eachnode(dg) - if orientation == 1 - u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element) - u_rr = get_node_vars(u, equations, dg, 1, i, right_element) - - # If the mapping is orientation-reversing, the contravariant vectors' orientation - # is reversed as well. The normal vector must be oriented in the direction - # from `left_element` to `right_element`, or the numerical flux will be computed - # incorrectly (downwind direction). - sign_jacobian = sign(inverse_jacobian[1, i, right_element]) - - # First contravariant vector Ja^1 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors, - 1, i, right_element) - else # orientation == 2 - u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element) - u_rr = get_node_vars(u, equations, dg, i, 1, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, 1, right_element]) - - # Second contravariant vector Ja^2 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors, - i, 1, right_element) + # This is slow for LSA, but for some reason faster for Euler (see #519) + if left_element <= 0 # left_element = 0 at boundaries + return nothing end - # If the mapping is orientation-reversing, the normal vector will be reversed (see above). - # However, the flux now has the wrong sign, since we need the physical flux in normal direction. - flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + @unpack surface_flux = surface_integral + @unpack contravariant_vectors, inverse_jacobian = cache.elements - for v in eachvariable(equations) - surface_flux_values[v, i, right_direction, left_element] = flux[v] - surface_flux_values[v, i, left_direction, right_element] = flux[v] + right_direction = 2 * orientation + left_direction = right_direction - 1 + + for i in eachnode(dg) + if orientation == 1 + u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element) + u_rr = get_node_vars(u, equations, dg, 1, i, right_element) + + # If the mapping is orientation-reversing, the contravariant vectors' orientation + # is reversed as well. The normal vector must be oriented in the direction + # from `left_element` to `right_element`, or the numerical flux will be computed + # incorrectly (downwind direction). + sign_jacobian = sign(inverse_jacobian[1, i, right_element]) + + # First contravariant vector Ja^1 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(1, contravariant_vectors, + 1, i, right_element) + else # orientation == 2 + u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element) + u_rr = get_node_vars(u, equations, dg, i, 1, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, 1, right_element]) + + # Second contravariant vector Ja^2 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(2, contravariant_vectors, + i, 1, right_element) + end + + # If the mapping is orientation-reversing, the normal vector will be reversed (see above). + # However, the flux now has the wrong sign, since we need the physical flux in normal direction. + flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + + for v in eachvariable(equations) + surface_flux_values[v, i, right_direction, left_element] = flux[v] + surface_flux_values[v, i, left_direction, right_element] = flux[v] + end end - end - return nothing + return nothing end @inline function calc_interface_flux!(surface_flux_values, left_element, right_element, @@ -417,148 +469,162 @@ end mesh::StructuredMesh{2}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - # See comment on `calc_interface_flux!` with `nonconservative_terms::False` - if left_element <= 0 # left_element = 0 at boundaries - return nothing - end - - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - right_direction = 2 * orientation - left_direction = right_direction - 1 - - for i in eachnode(dg) - if orientation == 1 - u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element) - u_rr = get_node_vars(u, equations, dg, 1, i, right_element) - - # If the mapping is orientation-reversing, the contravariant vectors' orientation - # is reversed as well. The normal vector must be oriented in the direction - # from `left_element` to `right_element`, or the numerical flux will be computed - # incorrectly (downwind direction). - sign_jacobian = sign(inverse_jacobian[1, i, right_element]) - - # First contravariant vector Ja^1 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors, - 1, i, right_element) - else # orientation == 2 - u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element) - u_rr = get_node_vars(u, equations, dg, i, 1, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, 1, right_element]) - - # Second contravariant vector Ja^2 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors, - i, 1, right_element) + # See comment on `calc_interface_flux!` with `nonconservative_terms::False` + if left_element <= 0 # left_element = 0 at boundaries + return nothing end - # If the mapping is orientation-reversing, the normal vector will be reversed (see above). - # However, the flux now has the wrong sign, since we need the physical flux in normal direction. - flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) - - # Compute both nonconservative fluxes - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `normal_direction` twice. - # Scale with sign_jacobian to ensure that the normal_direction matches that - # from the flux above - noncons_left = sign_jacobian * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - noncons_right = sign_jacobian * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, i, right_direction, left_element] = flux[v] + 0.5 * noncons_left[v] - surface_flux_values[v, i, left_direction, right_element] = flux[v] + 0.5 * noncons_right[v] + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + right_direction = 2 * orientation + left_direction = right_direction - 1 + + for i in eachnode(dg) + if orientation == 1 + u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element) + u_rr = get_node_vars(u, equations, dg, 1, i, right_element) + + # If the mapping is orientation-reversing, the contravariant vectors' orientation + # is reversed as well. The normal vector must be oriented in the direction + # from `left_element` to `right_element`, or the numerical flux will be computed + # incorrectly (downwind direction). + sign_jacobian = sign(inverse_jacobian[1, i, right_element]) + + # First contravariant vector Ja^1 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(1, contravariant_vectors, + 1, i, right_element) + else # orientation == 2 + u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element) + u_rr = get_node_vars(u, equations, dg, i, 1, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, 1, right_element]) + + # Second contravariant vector Ja^2 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(2, contravariant_vectors, + i, 1, right_element) + end + + # If the mapping is orientation-reversing, the normal vector will be reversed (see above). + # However, the flux now has the wrong sign, since we need the physical flux in normal direction. + flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + + # Compute both nonconservative fluxes + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `normal_direction` twice. + # Scale with sign_jacobian to ensure that the normal_direction matches that + # from the flux above + noncons_left = sign_jacobian * + nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + noncons_right = sign_jacobian * + nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, i, right_direction, left_element] = flux[v] + + 0.5 * + noncons_left[v] + surface_flux_values[v, i, left_direction, right_element] = flux[v] + + 0.5 * + noncons_right[v] + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic, - mesh::StructuredMesh{2}, equations, surface_integral, dg::DG) - @assert isperiodic(mesh) + mesh::StructuredMesh{2}, equations, surface_integral, + dg::DG) + @assert isperiodic(mesh) end function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, - mesh::StructuredMesh{2}, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - linear_indices = LinearIndices(size(mesh)) - - for cell_y in axes(mesh, 2) - # Negative x-direction - direction = 1 - element = linear_indices[begin, cell_y] - - for j in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (1, j), (j,), element) - end - - # Positive x-direction - direction = 2 - element = linear_indices[end, cell_y] - - for j in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (nnodes(dg), j), (j,), element) - end - end - - for cell_x in axes(mesh, 1) - # Negative y-direction - direction = 3 - element = linear_indices[cell_x, begin] - - for i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, 1), (i,), element) + mesh::StructuredMesh{2}, equations, surface_integral, + dg::DG) + @unpack surface_flux_values = cache.elements + linear_indices = LinearIndices(size(mesh)) + + for cell_y in axes(mesh, 2) + # Negative x-direction + direction = 1 + element = linear_indices[begin, cell_y] + + for j in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (1, j), (j,), element) + end + + # Positive x-direction + direction = 2 + element = linear_indices[end, cell_y] + + for j in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (nnodes(dg), j), (j,), element) + end end - # Positive y-direction - direction = 4 - element = linear_indices[cell_x, end] - - for i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, nnodes(dg)), (i,), element) + for cell_x in axes(mesh, 1) + # Negative y-direction + direction = 3 + element = linear_indices[cell_x, begin] + + for i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, 1), (i,), element) + end + + # Positive y-direction + direction = 4 + element = linear_indices[cell_x, end] + + for i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, nnodes(dg)), (i,), element) + end end - end end - function apply_jacobian!(du, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2 + } + }, equations, dg::DG, cache) - @unpack inverse_jacobian = cache.elements + @unpack inverse_jacobian = cache.elements - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - factor = -inverse_jacobian[i, j, element] + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + factor = -inverse_jacobian[i, j, element] - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index c17c4d5923f..43f70da4750 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -14,477 +14,490 @@ # We do not wrap this code in `@muladd begin ... end` block. Optimizations like # this are handled automatically by LoopVectorization.jl. - # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array` # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - @turbo for j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, element] - rho_v1 = u_cons[2, i, j, element] - rho_v2 = u_cons[3, i, j, element] - rho_e = u_cons[4, i, j, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 )) - - u_prim[i, j, 1] = rho - u_prim[i, j, 2] = v1 - u_prim[i, j, 3] = v2 - u_prim[i, j, 4] = p - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - u_prim_permuted[j, i, v] = u_prim[i, j, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # We must also permute the contravariant vectors. - contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] - contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element] - end - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for j in eachnode(dg) - rho_ll = u_prim_permuted[j, i, 1] - v1_ll = u_prim_permuted[j, i, 2] - v2_ll = u_prim_permuted[j, i, 3] - p_ll = u_prim_permuted[j, i, 4] - - rho_rr = u_prim_permuted[j, ii, 1] - v1_rr = u_prim_permuted[j, ii, 2] - v2_rr = u_prim_permuted[j, ii, 3] - p_rr = u_prim_permuted[j, ii, 4] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_x[j, i, 1] + contravariant_vectors_x[j, ii, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_x[j, i, 2] + contravariant_vectors_x[j, ii, 2]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[j, i, 1] += factor_i * f1 - du_permuted[j, i, 2] += factor_i * f2 - du_permuted[j, i, 3] += factor_i * f3 - du_permuted[j, i, 4] += factor_i * f4 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[j, ii, 1] += factor_ii * f1 - du_permuted[j, ii, 2] += factor_ii * f2 - du_permuted[j, ii, 3] += factor_ii * f3 - du_permuted[j, ii, 4] += factor_ii * f4 + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations)))) + + @turbo for j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, element] + rho_v1 = u_cons[2, i, j, element] + rho_v2 = u_cons[3, i, j, element] + rho_e = u_cons[4, i, j, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + u_prim[i, j, 1] = rho + u_prim[i, j, 2] = v1 + u_prim[i, j, 3] = v2 + u_prim[i, j, 4] = p end - end - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - du[i, j, v] = du_permuted[j, i, v] - end - - - # y direction - # We must also permute the contravariant vectors. - contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] - contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element] - end - - # The memory layout is already optimal for SIMD vectorization in this loop. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for i in eachnode(dg) - rho_ll = u_prim[i, j, 1] - v1_ll = u_prim[i, j, 2] - v2_ll = u_prim[i, j, 3] - p_ll = u_prim[i, j, 4] - - rho_rr = u_prim[i, jj, 1] - v1_rr = u_prim[i, jj, 2] - v2_rr = u_prim[i, jj, 3] - p_rr = u_prim[i, jj, 4] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_y[i, j, 1] + contravariant_vectors_y[i, jj, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_y[i, j, 2] + contravariant_vectors_y[i, jj, 2]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, 1] += factor_j * f1 - du[i, j, 2] += factor_j * f2 - du[i, j, 3] += factor_j * f3 - du[i, j, 4] += factor_j * f4 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, 1] += factor_jj * f1 - du[i, jj, 2] += factor_jj * f2 - du[i, jj, 3] += factor_jj * f3 - du[i, jj, 4] += factor_jj * f4 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + u_prim_permuted[j, i, v] = u_prim[i, j, v] end - end + fill!(du_permuted, zero(eltype(du_permuted))) + # We must also permute the contravariant vectors. + contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, element] += du[i, j, v] - end -end + @turbo for j in eachnode(dg), i in eachnode(dg) + contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] + contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element] + end + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for j in eachnode(dg) + rho_ll = u_prim_permuted[j, i, 1] + v1_ll = u_prim_permuted[j, i, 2] + v2_ll = u_prim_permuted[j, i, 3] + p_ll = u_prim_permuted[j, i, 4] + + rho_rr = u_prim_permuted[j, ii, 1] + v1_rr = u_prim_permuted[j, ii, 2] + v2_rr = u_prim_permuted[j, ii, 3] + p_rr = u_prim_permuted[j, ii, 4] + + normal_direction_1 = 0.5 * (contravariant_vectors_x[j, i, 1] + + contravariant_vectors_x[j, ii, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_x[j, i, 2] + + contravariant_vectors_x[j, ii, 2]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[j, i, 1] += factor_i * f1 + du_permuted[j, i, 2] += factor_i * f2 + du_permuted[j, i, 3] += factor_i * f3 + du_permuted[j, i, 4] += factor_i * f4 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[j, ii, 1] += factor_ii * f1 + du_permuted[j, ii, 2] += factor_ii * f2 + du_permuted[j, ii, 3] += factor_ii * f3 + du_permuted[j, ii, 4] += factor_ii * f4 + end + end + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + du[i, j, v] = du_permuted[j, i, v] + end + + # y direction + # We must also permute the contravariant vectors. + contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for j in eachnode(dg), i in eachnode(dg) + contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] + contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element] + end + + # The memory layout is already optimal for SIMD vectorization in this loop. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for i in eachnode(dg) + rho_ll = u_prim[i, j, 1] + v1_ll = u_prim[i, j, 2] + v2_ll = u_prim[i, j, 3] + p_ll = u_prim[i, j, 4] + + rho_rr = u_prim[i, jj, 1] + v1_rr = u_prim[i, jj, 2] + v2_rr = u_prim[i, jj, 3] + p_rr = u_prim[i, jj, 4] + + normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, 1] + + contravariant_vectors_y[i, jj, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, 2] + + contravariant_vectors_y[i, jj, 2]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, 1] += factor_j * f1 + du[i, j, 2] += factor_j * f2 + du[i, j, 3] += factor_j * f3 + du[i, j, 4] += factor_j * f4 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, 1] += factor_jj * f1 + du[i, jj, 2] += factor_jj * f2 + du[i, jj, 3] += factor_jj * f3 + du[i, jj, 4] += factor_jj * f4 + end + end + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, element] += du[i, j, v] + end +end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. In addition - # to the usual primitive variables, we also compute logarithms of the density - # and pressure to increase the performance of the required logarithmic mean - # values. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs - - @turbo for j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, element] - rho_v1 = u_cons[2, i, j, element] - rho_v2 = u_cons[3, i, j, element] - rho_e = u_cons[4, i, j, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - u_prim[i, j, 1] = rho - u_prim[i, j, 2] = v1 - u_prim[i, j, 3] = v2 - u_prim[i, j, 4] = p - u_prim[i, j, 5] = log(rho) - u_prim[i, j, 6] = log(p) - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) - - @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs - j in eachnode(dg), - i in eachnode(dg) - u_prim_permuted[j, i, v] = u_prim[i, j, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # We must also permute the contravariant vectors. - contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] - contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element] - end - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for j in eachnode(dg) - rho_ll = u_prim_permuted[j, i, 1] - v1_ll = u_prim_permuted[j, i, 2] - v2_ll = u_prim_permuted[j, i, 3] - p_ll = u_prim_permuted[j, i, 4] - log_rho_ll = u_prim_permuted[j, i, 5] - log_p_ll = u_prim_permuted[j, i, 6] - - rho_rr = u_prim_permuted[j, ii, 1] - v1_rr = u_prim_permuted[j, ii, 2] - v2_rr = u_prim_permuted[j, ii, 3] - p_rr = u_prim_permuted[j, ii, 4] - log_rho_rr = u_prim_permuted[j, ii, 5] - log_p_rr = u_prim_permuted[j, ii, 6] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_x[j, i, 1] + contravariant_vectors_x[j, ii, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_x[j, i, 2] + contravariant_vectors_x[j, ii, 2]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[j, i, 1] += factor_i * f1 - du_permuted[j, i, 2] += factor_i * f2 - du_permuted[j, i, 3] += factor_i * f3 - du_permuted[j, i, 4] += factor_i * f4 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[j, ii, 1] += factor_ii * f1 - du_permuted[j, ii, 2] += factor_ii * f2 - du_permuted[j, ii, 3] += factor_ii * f3 - du_permuted[j, ii, 4] += factor_ii * f4 + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. In addition + # to the usual primitive variables, we also compute logarithms of the density + # and pressure to increase the performance of the required logarithmic mean + # values. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs + + @turbo for j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, element] + rho_v1 = u_cons[2, i, j, element] + rho_v2 = u_cons[3, i, j, element] + rho_e = u_cons[4, i, j, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + u_prim[i, j, 1] = rho + u_prim[i, j, 2] = v1 + u_prim[i, j, 3] = v2 + u_prim[i, j, 4] = p + u_prim[i, j, 5] = log(rho) + u_prim[i, j, 6] = log(p) end - end - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - du[i, j, v] = du_permuted[j, i, v] - end - - - # y direction - # We must also permute the contravariant vectors. - contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] - contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element] - end - - # The memory layout is already optimal for SIMD vectorization in this loop. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for i in eachnode(dg) - rho_ll = u_prim[i, j, 1] - v1_ll = u_prim[i, j, 2] - v2_ll = u_prim[i, j, 3] - p_ll = u_prim[i, j, 4] - log_rho_ll = u_prim[i, j, 5] - log_p_ll = u_prim[i, j, 6] - - rho_rr = u_prim[i, jj, 1] - v1_rr = u_prim[i, jj, 2] - v2_rr = u_prim[i, jj, 3] - p_rr = u_prim[i, jj, 4] - log_rho_rr = u_prim[i, jj, 5] - log_p_rr = u_prim[i, jj, 6] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_y[i, j, 1] + contravariant_vectors_y[i, jj, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_y[i, j, 2] + contravariant_vectors_y[i, jj, 2]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, 1] += factor_j * f1 - du[i, j, 2] += factor_j * f2 - du[i, j, 3] += factor_j * f3 - du[i, j, 4] += factor_j * f4 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, 1] += factor_jj * f1 - du[i, jj, 2] += factor_jj * f2 - du[i, jj, 3] += factor_jj * f3 - du[i, jj, 4] += factor_jj * f4 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs + j in eachnode(dg), + i in eachnode(dg) + + u_prim_permuted[j, i, v] = u_prim[i, j, v] + end + fill!(du_permuted, zero(eltype(du_permuted))) + + # We must also permute the contravariant vectors. + contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for j in eachnode(dg), i in eachnode(dg) + contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] + contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element] + end + + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for j in eachnode(dg) + rho_ll = u_prim_permuted[j, i, 1] + v1_ll = u_prim_permuted[j, i, 2] + v2_ll = u_prim_permuted[j, i, 3] + p_ll = u_prim_permuted[j, i, 4] + log_rho_ll = u_prim_permuted[j, i, 5] + log_p_ll = u_prim_permuted[j, i, 6] + + rho_rr = u_prim_permuted[j, ii, 1] + v1_rr = u_prim_permuted[j, ii, 2] + v2_rr = u_prim_permuted[j, ii, 3] + p_rr = u_prim_permuted[j, ii, 4] + log_rho_rr = u_prim_permuted[j, ii, 5] + log_p_rr = u_prim_permuted[j, ii, 6] + + normal_direction_1 = 0.5 * (contravariant_vectors_x[j, i, 1] + + contravariant_vectors_x[j, ii, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_x[j, i, 2] + + contravariant_vectors_x[j, ii, 2]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[j, i, 1] += factor_i * f1 + du_permuted[j, i, 2] += factor_i * f2 + du_permuted[j, i, 3] += factor_i * f3 + du_permuted[j, i, 4] += factor_i * f4 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[j, ii, 1] += factor_ii * f1 + du_permuted[j, ii, 2] += factor_ii * f2 + du_permuted[j, ii, 3] += factor_ii * f3 + du_permuted[j, ii, 4] += factor_ii * f4 + end end - end + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, element] += du[i, j, v] - end + du[i, j, v] = du_permuted[j, i, v] + end + + # y direction + # We must also permute the contravariant vectors. + contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] + contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element] + end + + # The memory layout is already optimal for SIMD vectorization in this loop. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for i in eachnode(dg) + rho_ll = u_prim[i, j, 1] + v1_ll = u_prim[i, j, 2] + v2_ll = u_prim[i, j, 3] + p_ll = u_prim[i, j, 4] + log_rho_ll = u_prim[i, j, 5] + log_p_ll = u_prim[i, j, 6] + + rho_rr = u_prim[i, jj, 1] + v1_rr = u_prim[i, jj, 2] + v2_rr = u_prim[i, jj, 3] + p_rr = u_prim[i, jj, 4] + log_rho_rr = u_prim[i, jj, 5] + log_p_rr = u_prim[i, jj, 6] + + normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, 1] + + contravariant_vectors_y[i, jj, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, 2] + + contravariant_vectors_y[i, jj, 2]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, 1] += factor_j * f1 + du[i, j, 2] += factor_j * f2 + du[i, j, 3] += factor_j * f3 + du[i, j, 4] += factor_j * f4 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, 1] += factor_jj * f1 + du[i, jj, 2] += factor_jj * f2 + du[i, jj, 3] += factor_jj * f3 + du[i, jj, 4] += factor_jj * f4 + end + end + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, element] += du[i, j, v] + end end diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 6c27e206321..0e6bf8a2ac0 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -3,525 +3,592 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent function rhs!(du, u, t, mesh::StructuredMesh{3}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache, u, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache, u, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg) + end -@inline function weak_form_kernel!(du, u, - element, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, - nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements - - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - - flux1 = flux(u_node, 1, equations) - flux2 = flux(u_node, 2, equations) - flux3 = flux(u_node, 3, equations) - - # Compute the contravariant flux by taking the scalar product of the - # first contravariant vector Ja^1 and the flux vector - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3 - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], contravariant_flux1, equations, dg, ii, j, k, element) + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) end - # Compute the contravariant flux by taking the scalar product of the - # second contravariant vector Ja^2 and the flux vector - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3 - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], contravariant_flux2, equations, dg, i, jj, k, element) + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) end - # Compute the contravariant flux by taking the scalar product of the - # third contravariant vector Ja^3 and the flux vector - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3 - for kk in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], contravariant_flux3, equations, dg, i, j, kk, element) + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) end - end - return nothing + return nothing end +@inline function weak_form_kernel!(du, u, + element, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + nonconservative_terms::False, equations, + dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_dhat = dg.basis + @unpack contravariant_vectors = cache.elements + + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + + flux1 = flux(u_node, 1, equations) + flux2 = flux(u_node, 2, equations) + flux3 = flux(u_node, 3, equations) + + # Compute the contravariant flux by taking the scalar product of the + # first contravariant vector Ja^1 and the flux vector + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, + element) + contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3 + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], + contravariant_flux1, equations, dg, ii, j, k, + element) + end + + # Compute the contravariant flux by taking the scalar product of the + # second contravariant vector Ja^2 and the flux vector + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, + element) + contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3 + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], + contravariant_flux2, equations, dg, i, jj, k, + element) + end + + # Compute the contravariant flux by taking the scalar product of the + # third contravariant vector Ja^3 and the flux vector + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, + element) + contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3 + for kk in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], + contravariant_flux3, equations, dg, i, j, kk, + element) + end + end + + return nothing +end # flux differencing volume integral on curvilinear hexahedral elements. Averaging of the # mapping terms, stored in `contravariant_vectors`, is peeled apart from the evaluation of # the physical fluxes in each Cartesian direction @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + element, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::False, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Calculate volume integral in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - - # pull the contravariant vectors in each coordinate direction - Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - - # All diagonal entries of `derivative_split` are zero. Thus, we can skip - # the computation of the diagonal terms. In addition, we use the symmetry - # of the `volume_flux` to save half of the possible two-point flux - # computations. - - # x direction - for ii in (i+1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) - # pull the contravariant vectors and compute the average - Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, - ii, j, k, element) - Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) - # compute the contravariant sharp flux in the direction of the - # averaged contravariant vector - fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, equations, dg, ii, j, k, element) - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements - # y direction - for jj in (j+1):nnodes(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) - # pull the contravariant vectors and compute the average - Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, - i, jj, k, element) - Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) - # compute the contravariant sharp flux in the direction of the - # averaged contravariant vector - fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, equations, dg, i, jj, k, element) - end - - # z direction - for kk in (k+1):nnodes(dg) - u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) - # pull the contravariant vectors and compute the average - Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, - i, j, kk, element) - Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk) - # compute the contravariant sharp flux in the direction of the - # averaged contravariant vector - fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3, equations, dg, i, j, kk, element) + # Calculate volume integral in one element + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + + # pull the contravariant vectors in each coordinate direction + Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) + Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) + Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):nnodes(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + # pull the contravariant vectors and compute the average + Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, + ii, j, k, element) + Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, + equations, dg, ii, j, k, element) + end + + # y direction + for jj in (j + 1):nnodes(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + # pull the contravariant vectors and compute the average + Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, + i, jj, k, element) + Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, + equations, dg, i, jj, k, element) + end + + # z direction + for kk in (k + 1):nnodes(dg) + u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + # pull the contravariant vectors and compute the average + Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, + i, j, kk, element) + Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3, + equations, dg, i, j, kk, element) + end end - end end @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + element, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::True, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - symmetric_flux, nonconservative_flux = volume_flux - - # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha) - - # Calculate the remaining volume terms using the nonsymmetric generalized flux - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - - # pull the contravariant vectors in each coordinate direction - Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) - Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) - Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) - - # The diagonal terms are zero since the diagonal of `derivative_split` - # is zero. We ignore this for now. - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # Thus, we need to pass both to the nonconservative flux. - - # x direction - integral_contribution = zero(u_node) - for ii in eachnode(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) - # pull the contravariant vectors and compute the average - Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k, element) - Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) - # compute the contravariant nonconservative flux in the direction of the - # averaged contravariant vector - fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, equations) - integral_contribution = integral_contribution + derivative_split[i, ii] * fluxtilde1 - end + volume_flux, dg::DGSEM, cache, alpha = true) + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + symmetric_flux, nonconservative_flux = volume_flux - # y direction - for jj in eachnode(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) - # pull the contravariant vectors and compute the average - Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k, element) - Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) - # compute the contravariant nonconservative flux in the direction of the - # averaged contravariant vector - fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, equations) - integral_contribution = integral_contribution + derivative_split[j, jj] * fluxtilde2 - end + # Apply the symmetric flux as usual + flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + dg, cache, alpha) - # z direction - for kk in eachnode(dg) - u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) - # pull the contravariant vectors and compute the average - Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk, element) - Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk) - # compute the contravariant nonconservative flux in the direction of the - # averaged contravariant vector - fluxtilde3 = nonconservative_flux(u_node, u_node_kk, Ja3_node, Ja3_avg, equations) - integral_contribution = integral_contribution + derivative_split[k, kk] * fluxtilde3 + # Calculate the remaining volume terms using the nonsymmetric generalized flux + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + + # pull the contravariant vectors in each coordinate direction + Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) + Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) + Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) + + # The diagonal terms are zero since the diagonal of `derivative_split` + # is zero. We ignore this for now. + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # Thus, we need to pass both to the nonconservative flux. + + # x direction + integral_contribution = zero(u_node) + for ii in eachnode(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + # pull the contravariant vectors and compute the average + Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k, + element) + Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) + # compute the contravariant nonconservative flux in the direction of the + # averaged contravariant vector + fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, + equations) + integral_contribution = integral_contribution + + derivative_split[i, ii] * fluxtilde1 + end + + # y direction + for jj in eachnode(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + # pull the contravariant vectors and compute the average + Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k, + element) + Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) + # compute the contravariant nonconservative flux in the direction of the + # averaged contravariant vector + fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, + equations) + integral_contribution = integral_contribution + + derivative_split[j, jj] * fluxtilde2 + end + + # z direction + for kk in eachnode(dg) + u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + # pull the contravariant vectors and compute the average + Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk, + element) + Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk) + # compute the contravariant nonconservative flux in the direction of the + # averaged contravariant vector + fluxtilde3 = nonconservative_flux(u_node, u_node_kk, Ja3_node, Ja3_avg, + equations) + integral_contribution = integral_contribution + + derivative_split[k, kk] * fluxtilde3 + end + + # The factor 0.5 cancels the factor 2 in the flux differencing form + multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, + dg, i, j, k, element) end - - # The factor 0.5 cancels the factor 2 in the flux differencing form - multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, k, element) - end end - # Computing the normal vector for the FV method on curvilinear subcells. # To fulfill free-stream preservation we use the explicit formula B.53 in Appendix B.4 # by Hennemann, Rueda-Ramirez, Hindenlang, Gassner (2020) # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::False, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, + fstar3_R, u, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) - @unpack contravariant_vectors = cache.elements - @unpack weights, derivative_matrix = dg.basis + @unpack contravariant_vectors = cache.elements + @unpack weights, derivative_matrix = dg.basis - # Performance improvement if the metric terms of the subcell FV method are only computed - # once at the beginning of the simulation, instead of at every Runge-Kutta stage - fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R)) + # Performance improvement if the metric terms of the subcell FV method are only computed + # once at the beginning of the simulation, instead of at every Runge-Kutta stage + fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R)) - for k in eachnode(dg), j in eachnode(dg) - normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, element) + for k in eachnode(dg), j in eachnode(dg) + normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, + element) - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) - for m in 1:nnodes(dg) - normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, k, element) - end + for m in 1:nnodes(dg) + normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] * + get_contravariant_vector(1, contravariant_vectors, + m, j, k, element) + end - # Compute the contravariant flux - contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) + # Compute the contravariant flux + contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) - set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j, k) - set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j, k) + end end - end - fstar2_L[:, :, 1 , :] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 , :] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R)) + fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R)) - for k in eachnode(dg), i in eachnode(dg) - normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, element) + for k in eachnode(dg), i in eachnode(dg) + normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, + element) - for j in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) + for j in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) - for m in 1:nnodes(dg) - normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, k, element) - end + for m in 1:nnodes(dg) + normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] * + get_contravariant_vector(2, contravariant_vectors, + i, m, k, element) + end - # Compute the contravariant flux - contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) + # Compute the contravariant flux + contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) - set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j, k) - set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j, k) + end end - end - fstar3_L[:, :, :, 1 ] .= zero(eltype(fstar3_L)) - fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L)) - fstar3_R[:, :, :, 1 ] .= zero(eltype(fstar3_R)) - fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R)) + fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L)) + fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L)) + fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R)) + fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R)) - for j in eachnode(dg), i in eachnode(dg) - normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, element) + for j in eachnode(dg), i in eachnode(dg) + normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, + element) - for k in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j, k-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) + for k in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) - for m in 1:nnodes(dg) - normal_direction += weights[k-1] * derivative_matrix[k-1, m] * get_contravariant_vector(3, contravariant_vectors, i, j, m, element) - end + for m in 1:nnodes(dg) + normal_direction += weights[k - 1] * derivative_matrix[k - 1, m] * + get_contravariant_vector(3, contravariant_vectors, + i, j, m, element) + end - # Compute the contravariant flux - contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) + # Compute the contravariant flux + contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations) - set_node_vars!(fstar3_L, contravariant_flux, equations, dg, i, j, k) - set_node_vars!(fstar3_R, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar3_L, contravariant_flux, equations, dg, i, j, k) + set_node_vars!(fstar3_R, contravariant_flux, equations, dg, i, j, k) + end end - end - return nothing + return nothing end # # Calculate the finite volume fluxes inside curvilinear elements (**with non-conservative terms**). -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::True, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, + fstar3_R, u, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) - @unpack contravariant_vectors = cache.elements - @unpack weights, derivative_matrix = dg.basis - - volume_flux, nonconservative_flux = volume_flux_fv - - # Performance improvement if the metric terms of the subcell FV method are only computed - # once at the beginning of the simulation, instead of at every Runge-Kutta stage - fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R)) - - for k in eachnode(dg), j in eachnode(dg) - normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, element) - - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) + @unpack contravariant_vectors = cache.elements + @unpack weights, derivative_matrix = dg.basis - for m in eachnode(dg) - normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, k, element) - end + volume_flux, nonconservative_flux = volume_flux_fv - # Compute the contravariant conservative flux - ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) + # Performance improvement if the metric terms of the subcell FV method are only computed + # once at the beginning of the simulation, instead of at every Runge-Kutta stage + fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R)) - # Compute and add in the nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - set_node_vars!(fstar1_L, ftilde_L, equations, dg, i, j, k) - set_node_vars!(fstar1_R, ftilde_R, equations, dg, i, j, k) + for k in eachnode(dg), j in eachnode(dg) + normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, + element) + + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + for m in eachnode(dg) + normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] * + get_contravariant_vector(1, contravariant_vectors, + m, j, k, element) + end + + # Compute the contravariant conservative flux + ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) + + # Compute and add in the nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + ftilde_L = ftilde + + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + ftilde_R = ftilde + + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + set_node_vars!(fstar1_L, ftilde_L, equations, dg, i, j, k) + set_node_vars!(fstar1_R, ftilde_R, equations, dg, i, j, k) + end end - end - - fstar2_L[:, :, 1 , :] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 , :] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R)) - - for k in eachnode(dg), i in eachnode(dg) - normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, element) - - for j in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - for m in eachnode(dg) - normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, k, element) - end + fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R)) - # Compute the contravariant conservative flux - ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) - - # Compute and add in the nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - set_node_vars!(fstar2_L, ftilde_L, equations, dg, i, j, k) - set_node_vars!(fstar2_R, ftilde_R, equations, dg, i, j, k) + for k in eachnode(dg), i in eachnode(dg) + normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, + element) + + for j in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + for m in eachnode(dg) + normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] * + get_contravariant_vector(2, contravariant_vectors, + i, m, k, element) + end + + # Compute the contravariant conservative flux + ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) + + # Compute and add in the nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + ftilde_L = ftilde + + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + ftilde_R = ftilde + + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + set_node_vars!(fstar2_L, ftilde_L, equations, dg, i, j, k) + set_node_vars!(fstar2_R, ftilde_R, equations, dg, i, j, k) + end end - end - - fstar3_L[:, :, :, 1 ] .= zero(eltype(fstar3_L)) - fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L)) - fstar3_R[:, :, :, 1 ] .= zero(eltype(fstar3_R)) - fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R)) - - for j in eachnode(dg), i in eachnode(dg) - normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, element) - for k in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i, j, k-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) + fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L)) + fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L)) + fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R)) + fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R)) - for m in eachnode(dg) - normal_direction += weights[k-1] * derivative_matrix[k-1, m] * get_contravariant_vector(3, contravariant_vectors, i, j, m, element) - end - - # Compute the contravariant conservative flux - ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) - - # Compute and add in the nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - set_node_vars!(fstar3_L, ftilde_L, equations, dg, i, j, k) - set_node_vars!(fstar3_R, ftilde_R, equations, dg, i, j, k) + for j in eachnode(dg), i in eachnode(dg) + normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, + element) + + for k in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + for m in eachnode(dg) + normal_direction += weights[k - 1] * derivative_matrix[k - 1, m] * + get_contravariant_vector(3, contravariant_vectors, + i, j, m, element) + end + + # Compute the contravariant conservative flux + ftilde = volume_flux(u_ll, u_rr, normal_direction, equations) + + # Compute and add in the nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + ftilde_L = ftilde + + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + ftilde_R = ftilde + + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + set_node_vars!(fstar3_L, ftilde_L, equations, dg, i, j, k) + set_node_vars!(fstar3_R, ftilde_R, equations, dg, i, j, k) + end end - end - return nothing + return nothing end - function calc_interface_flux!(cache, u, mesh::StructuredMesh{3}, nonconservative_terms, # can be True/False equations, surface_integral, dg::DG) - @unpack elements = cache - - @threaded for element in eachelement(dg, cache) - # Interfaces in negative directions - # Faster version of "for orientation in (1, 2, 3)" - - # Interfaces in x-direction (`orientation` = 1) - calc_interface_flux!(elements.surface_flux_values, - elements.left_neighbors[1, element], - element, 1, u, mesh, - nonconservative_terms, equations, - surface_integral, dg, cache) - - # Interfaces in y-direction (`orientation` = 2) - calc_interface_flux!(elements.surface_flux_values, - elements.left_neighbors[2, element], - element, 2, u, mesh, - nonconservative_terms, equations, - surface_integral, dg, cache) - - # Interfaces in z-direction (`orientation` = 3) - calc_interface_flux!(elements.surface_flux_values, - elements.left_neighbors[3, element], - element, 3, u, mesh, - nonconservative_terms, equations, - surface_integral, dg, cache) - end - - return nothing -end + @unpack elements = cache + + @threaded for element in eachelement(dg, cache) + # Interfaces in negative directions + # Faster version of "for orientation in (1, 2, 3)" + + # Interfaces in x-direction (`orientation` = 1) + calc_interface_flux!(elements.surface_flux_values, + elements.left_neighbors[1, element], + element, 1, u, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache) + + # Interfaces in y-direction (`orientation` = 2) + calc_interface_flux!(elements.surface_flux_values, + elements.left_neighbors[2, element], + element, 2, u, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache) + + # Interfaces in z-direction (`orientation` = 3) + calc_interface_flux!(elements.surface_flux_values, + elements.left_neighbors[3, element], + element, 3, u, mesh, + nonconservative_terms, equations, + surface_integral, dg, cache) + end + return nothing +end @inline function calc_interface_flux!(surface_flux_values, left_element, right_element, orientation, u, mesh::StructuredMesh{3}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - # This is slow for LSA, but for some reason faster for Euler (see #519) - if left_element <= 0 # left_element = 0 at boundaries - return surface_flux_values - end - - @unpack surface_flux = surface_integral - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - right_direction = 2 * orientation - left_direction = right_direction - 1 - - for j in eachnode(dg), i in eachnode(dg) - if orientation == 1 - u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element) - u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element) - - # If the mapping is orientation-reversing, the contravariant vectors' orientation - # is reversed as well. The normal vector must be oriented in the direction - # from `left_element` to `right_element`, or the numerical flux will be computed - # incorrectly (downwind direction). - sign_jacobian = sign(inverse_jacobian[1, i, j, right_element]) - - # First contravariant vector Ja^1 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors, - 1, i, j, right_element) - elseif orientation == 2 - u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element) - u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element]) - - # Second contravariant vector Ja^2 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors, - i, 1, j, right_element) - else # orientation == 3 - u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element) - u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element]) - - # Third contravariant vector Ja^3 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(3, contravariant_vectors, - i, j, 1, right_element) + # This is slow for LSA, but for some reason faster for Euler (see #519) + if left_element <= 0 # left_element = 0 at boundaries + return surface_flux_values end - # If the mapping is orientation-reversing, the normal vector will be reversed (see above). - # However, the flux now has the wrong sign, since we need the physical flux in normal direction. - flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + @unpack surface_flux = surface_integral + @unpack contravariant_vectors, inverse_jacobian = cache.elements - for v in eachvariable(equations) - surface_flux_values[v, i, j, right_direction, left_element] = flux[v] - surface_flux_values[v, i, j, left_direction, right_element] = flux[v] + right_direction = 2 * orientation + left_direction = right_direction - 1 + + for j in eachnode(dg), i in eachnode(dg) + if orientation == 1 + u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element) + u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element) + + # If the mapping is orientation-reversing, the contravariant vectors' orientation + # is reversed as well. The normal vector must be oriented in the direction + # from `left_element` to `right_element`, or the numerical flux will be computed + # incorrectly (downwind direction). + sign_jacobian = sign(inverse_jacobian[1, i, j, right_element]) + + # First contravariant vector Ja^1 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(1, contravariant_vectors, + 1, i, j, right_element) + elseif orientation == 2 + u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element) + u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element]) + + # Second contravariant vector Ja^2 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(2, contravariant_vectors, + i, 1, j, right_element) + else # orientation == 3 + u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element) + u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element]) + + # Third contravariant vector Ja^3 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(3, contravariant_vectors, + i, j, 1, right_element) + end + + # If the mapping is orientation-reversing, the normal vector will be reversed (see above). + # However, the flux now has the wrong sign, since we need the physical flux in normal direction. + flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + + for v in eachvariable(equations) + surface_flux_values[v, i, j, right_direction, left_element] = flux[v] + surface_flux_values[v, i, j, left_direction, right_element] = flux[v] + end end - end - return nothing + return nothing end @inline function calc_interface_flux!(surface_flux_values, left_element, right_element, @@ -529,181 +596,198 @@ end mesh::StructuredMesh{3}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - # See comment on `calc_interface_flux!` with `nonconservative_terms::False` - if left_element <= 0 # left_element = 0 at boundaries - return surface_flux_values - end - - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack contravariant_vectors, inverse_jacobian = cache.elements - - right_direction = 2 * orientation - left_direction = right_direction - 1 - - for j in eachnode(dg), i in eachnode(dg) - if orientation == 1 - u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element) - u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element) - - # If the mapping is orientation-reversing, the contravariant vectors' orientation - # is reversed as well. The normal vector must be oriented in the direction - # from `left_element` to `right_element`, or the numerical flux will be computed - # incorrectly (downwind direction). - sign_jacobian = sign(inverse_jacobian[1, i, j, right_element]) - - # First contravariant vector Ja^1 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors, - 1, i, j, right_element) - elseif orientation == 2 - u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element) - u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element]) - - # Second contravariant vector Ja^2 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors, - i, 1, j, right_element) - else # orientation == 3 - u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element) - u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element) - - # See above - sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element]) - - # Third contravariant vector Ja^3 as SVector - normal_direction = sign_jacobian * get_contravariant_vector(3, contravariant_vectors, - i, j, 1, right_element) + # See comment on `calc_interface_flux!` with `nonconservative_terms::False` + if left_element <= 0 # left_element = 0 at boundaries + return surface_flux_values end - # If the mapping is orientation-reversing, the normal vector will be reversed (see above). - # However, the flux now has the wrong sign, since we need the physical flux in normal direction. - flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) - - # Compute both nonconservative fluxes - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `normal_direction` twice. - # Scale with sign_jacobian to ensure that the normal_direction matches that - # from the flux above - noncons_left = sign_jacobian * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations) - noncons_right = sign_jacobian * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations) - - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, i, j, right_direction, left_element] = flux[v] + 0.5 * noncons_left[v] - surface_flux_values[v, i, j, left_direction, right_element] = flux[v] + 0.5 * noncons_right[v] + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack contravariant_vectors, inverse_jacobian = cache.elements + + right_direction = 2 * orientation + left_direction = right_direction - 1 + + for j in eachnode(dg), i in eachnode(dg) + if orientation == 1 + u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element) + u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element) + + # If the mapping is orientation-reversing, the contravariant vectors' orientation + # is reversed as well. The normal vector must be oriented in the direction + # from `left_element` to `right_element`, or the numerical flux will be computed + # incorrectly (downwind direction). + sign_jacobian = sign(inverse_jacobian[1, i, j, right_element]) + + # First contravariant vector Ja^1 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(1, contravariant_vectors, + 1, i, j, right_element) + elseif orientation == 2 + u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element) + u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element]) + + # Second contravariant vector Ja^2 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(2, contravariant_vectors, + i, 1, j, right_element) + else # orientation == 3 + u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element) + u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element) + + # See above + sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element]) + + # Third contravariant vector Ja^3 as SVector + normal_direction = sign_jacobian * + get_contravariant_vector(3, contravariant_vectors, + i, j, 1, right_element) + end + + # If the mapping is orientation-reversing, the normal vector will be reversed (see above). + # However, the flux now has the wrong sign, since we need the physical flux in normal direction. + flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations) + + # Compute both nonconservative fluxes + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `normal_direction` twice. + # Scale with sign_jacobian to ensure that the normal_direction matches that + # from the flux above + noncons_left = sign_jacobian * + nonconservative_flux(u_ll, u_rr, normal_direction, + normal_direction, equations) + noncons_right = sign_jacobian * + nonconservative_flux(u_rr, u_ll, normal_direction, + normal_direction, equations) + + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, i, j, right_direction, left_element] = flux[v] + + 0.5 * + noncons_left[v] + surface_flux_values[v, i, j, left_direction, right_element] = flux[v] + + 0.5 * + noncons_right[v] + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic, - mesh::StructuredMesh{3}, equations, surface_integral, dg::DG) - @assert isperiodic(mesh) + mesh::StructuredMesh{3}, equations, surface_integral, + dg::DG) + @assert isperiodic(mesh) end function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, - mesh::StructuredMesh{3}, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - linear_indices = LinearIndices(size(mesh)) - - for cell_z in axes(mesh, 3), cell_y in axes(mesh, 2) - # Negative x-direction - direction = 1 - element = linear_indices[begin, cell_y, cell_z] - - for k in eachnode(dg), j in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (1, j, k), (j, k), element) + mesh::StructuredMesh{3}, equations, surface_integral, + dg::DG) + @unpack surface_flux_values = cache.elements + linear_indices = LinearIndices(size(mesh)) + + for cell_z in axes(mesh, 3), cell_y in axes(mesh, 2) + # Negative x-direction + direction = 1 + element = linear_indices[begin, cell_y, cell_z] + + for k in eachnode(dg), j in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (1, j, k), (j, k), element) + end + + # Positive x-direction + direction = 2 + element = linear_indices[end, cell_y, cell_z] + + for k in eachnode(dg), j in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (nnodes(dg), j, k), (j, k), + element) + end end - # Positive x-direction - direction = 2 - element = linear_indices[end, cell_y, cell_z] - - for k in eachnode(dg), j in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (nnodes(dg), j, k), (j, k), element) + for cell_z in axes(mesh, 3), cell_x in axes(mesh, 1) + # Negative y-direction + direction = 3 + element = linear_indices[cell_x, begin, cell_z] + + for k in eachnode(dg), i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, 1, k), (i, k), element) + end + + # Positive y-direction + direction = 4 + element = linear_indices[cell_x, end, cell_z] + + for k in eachnode(dg), i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, nnodes(dg), k), (i, k), + element) + end end - end - - for cell_z in axes(mesh, 3), cell_x in axes(mesh, 1) - # Negative y-direction - direction = 3 - element = linear_indices[cell_x, begin, cell_z] - for k in eachnode(dg), i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, 1, k), (i, k), element) - end - - # Positive y-direction - direction = 4 - element = linear_indices[cell_x, end, cell_z] - - for k in eachnode(dg), i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, nnodes(dg), k), (i, k), element) + for cell_y in axes(mesh, 2), cell_x in axes(mesh, 1) + # Negative z-direction + direction = 5 + element = linear_indices[cell_x, cell_y, begin] + + for j in eachnode(dg), i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, j, 1), (i, j), element) + end + + # Positive z-direction + direction = 6 + element = linear_indices[cell_x, cell_y, end] + + for j in eachnode(dg), i in eachnode(dg) + calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3, + boundary_conditions[direction], + mesh, equations, surface_integral, dg, + cache, + direction, (i, j, nnodes(dg)), (i, j), + element) + end end - end - - for cell_y in axes(mesh, 2), cell_x in axes(mesh, 1) - # Negative z-direction - direction = 5 - element = linear_indices[cell_x, cell_y, begin] - - for j in eachnode(dg), i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, j, 1), (i, j), element) - end - - # Positive z-direction - direction = 6 - element = linear_indices[cell_x, cell_y, end] - - for j in eachnode(dg), i in eachnode(dg) - calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3, - boundary_conditions[direction], - mesh, equations, surface_integral, dg, cache, - direction, (i, j, nnodes(dg)), (i, j), element) - end - end end - function apply_jacobian!(du, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - factor = -cache.elements.inverse_jacobian[i, j, k, element] - - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end + @threaded for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + factor = -cache.elements.inverse_jacobian[i, j, k, element] + + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor + end + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 8caff2eff6b..64a3456b940 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -14,733 +14,774 @@ # We do not wrap this code in `@muladd begin ... end` block. Optimizations like # this are handled automatically by LoopVectorization.jl. - # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array` # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + element, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, k, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, k, element] - rho_v1 = u_cons[2, i, j, k, element] - rho_v2 = u_cons[3, i, j, k, element] - rho_v3 = u_cons[4, i, j, k, element] - rho_e = u_cons[5, i, j, k, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - - u_prim[i, j, k, 1] = rho - u_prim[i, j, k, 2] = v1 - u_prim[i, j, k, 3] = v2 - u_prim[i, j, k, 4] = v3 - u_prim[i, j, k, 5] = p - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # We must also permute the contravariant vectors. - contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element] - contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element] - contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element] - end - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for jk in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_permuted[jk, i, 1] - v1_ll = u_prim_permuted[jk, i, 2] - v2_ll = u_prim_permuted[jk, i, 3] - v3_ll = u_prim_permuted[jk, i, 4] - p_ll = u_prim_permuted[jk, i, 5] - - rho_rr = u_prim_permuted[jk, ii, 1] - v1_rr = u_prim_permuted[jk, ii, 2] - v2_rr = u_prim_permuted[jk, ii, 3] - v3_rr = u_prim_permuted[jk, ii, 4] - p_rr = u_prim_permuted[jk, ii, 5] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_x[jk, i, 1] + contravariant_vectors_x[jk, ii, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_x[jk, i, 2] + contravariant_vectors_x[jk, ii, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_x[jk, i, 3] + contravariant_vectors_x[jk, ii, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[jk, i, 1] += factor_i * f1 - du_permuted[jk, i, 2] += factor_i * f2 - du_permuted[jk, i, 3] += factor_i * f3 - du_permuted[jk, i, 4] += factor_i * f4 - du_permuted[jk, i, 5] += factor_i * f5 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[jk, ii, 1] += factor_ii * f1 - du_permuted[jk, ii, 2] += factor_ii * f2 - du_permuted[jk, ii, 3] += factor_ii * f3 - du_permuted[jk, ii, 4] += factor_ii * f4 - du_permuted[jk, ii, 5] += factor_ii * f5 + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, k, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, k, element] + rho_v1 = u_cons[2, i, j, k, element] + rho_v2 = u_cons[3, i, j, k, element] + rho_v3 = u_cons[4, i, j, k, element] + rho_e = u_cons[5, i, j, k, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + + u_prim[i, j, k, 1] = rho + u_prim[i, j, k, 2] = v1 + u_prim[i, j, k, 3] = v2 + u_prim[i, j, k, 4] = v3 + u_prim[i, j, k, 5] = p end - end - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - du[i, j, k, v] = du_permuted[jk, i, v] - end - - - # y direction - # We must also permute the contravariant vectors. - contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] - contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element] - contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element] - end - - # A possible permutation of array dimensions with improved opportunities for - # SIMD vectorization appeared to be slower than the direct version used here - # in preliminary numerical experiments on an AVX2 system. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for k in eachnode(dg), i in eachnode(dg) - rho_ll = u_prim[i, j, k, 1] - v1_ll = u_prim[i, j, k, 2] - v2_ll = u_prim[i, j, k, 3] - v3_ll = u_prim[i, j, k, 4] - p_ll = u_prim[i, j, k, 5] - - rho_rr = u_prim[i, jj, k, 1] - v1_rr = u_prim[i, jj, k, 2] - v2_rr = u_prim[i, jj, k, 3] - v3_rr = u_prim[i, jj, k, 4] - p_rr = u_prim[i, jj, k, 5] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_y[i, j, k, 1] + contravariant_vectors_y[i, jj, k, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_y[i, j, k, 2] + contravariant_vectors_y[i, jj, k, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_y[i, j, k, 3] + contravariant_vectors_y[i, jj, k, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, k, 1] += factor_j * f1 - du[i, j, k, 2] += factor_j * f2 - du[i, j, k, 3] += factor_j * f3 - du[i, j, k, 4] += factor_j * f4 - du[i, j, k, 5] += factor_j * f5 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, k, 1] += factor_jj * f1 - du[i, jj, k, 2] += factor_jj * f2 - du[i, jj, k, 3] += factor_jj * f3 - du[i, jj, k, 4] += factor_jj * f4 - du[i, jj, k, 5] += factor_jj * f5 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + jk = j + nnodes(dg) * (k - 1) + u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] + end + fill!(du_permuted, zero(eltype(du_permuted))) + + # We must also permute the contravariant vectors. + contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + jk = j + nnodes(dg) * (k - 1) + contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element] + contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element] + contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element] end - end + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for jk in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_permuted[jk, i, 1] + v1_ll = u_prim_permuted[jk, i, 2] + v2_ll = u_prim_permuted[jk, i, 3] + v3_ll = u_prim_permuted[jk, i, 4] + p_ll = u_prim_permuted[jk, i, 5] + + rho_rr = u_prim_permuted[jk, ii, 1] + v1_rr = u_prim_permuted[jk, ii, 2] + v2_rr = u_prim_permuted[jk, ii, 3] + v3_rr = u_prim_permuted[jk, ii, 4] + p_rr = u_prim_permuted[jk, ii, 5] + + normal_direction_1 = 0.5 * (contravariant_vectors_x[jk, i, 1] + + contravariant_vectors_x[jk, ii, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_x[jk, i, 2] + + contravariant_vectors_x[jk, ii, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_x[jk, i, 3] + + contravariant_vectors_x[jk, ii, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[jk, i, 1] += factor_i * f1 + du_permuted[jk, i, 2] += factor_i * f2 + du_permuted[jk, i, 3] += factor_i * f3 + du_permuted[jk, i, 4] += factor_i * f4 + du_permuted[jk, i, 5] += factor_i * f5 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[jk, ii, 1] += factor_ii * f1 + du_permuted[jk, ii, 2] += factor_ii * f2 + du_permuted[jk, ii, 3] += factor_ii * f3 + du_permuted[jk, ii, 4] += factor_ii * f4 + du_permuted[jk, ii, 5] += factor_ii * f5 + end + end - # z direction - # The memory layout is already optimal for SIMD vectorization in this loop. - # We just squeeze the first two dimensions to make the code slightly faster. - GC.@preserve u_prim begin - u_prim_reshaped = PtrArray(pointer(u_prim), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) - du_reshaped = PtrArray(pointer(du), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) + jk = j + nnodes(dg) * (k - 1) + du[i, j, k, v] = du_permuted[jk, i, v] + end + # y direction # We must also permute the contravariant vectors. - contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - ij = i + nnodes(dg) * (j- 1) - contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, element] - contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, element] - contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, element] + contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] + contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element] + contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element] end - for k in eachnode(dg), kk in (k+1):nnodes(dg) - @turbo for ij in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_reshaped[ij, k, 1] - v1_ll = u_prim_reshaped[ij, k, 2] - v2_ll = u_prim_reshaped[ij, k, 3] - v3_ll = u_prim_reshaped[ij, k, 4] - p_ll = u_prim_reshaped[ij, k, 5] - - rho_rr = u_prim_reshaped[ij, kk, 1] - v1_rr = u_prim_reshaped[ij, kk, 2] - v2_rr = u_prim_reshaped[ij, kk, 3] - v3_rr = u_prim_reshaped[ij, kk, 4] - p_rr = u_prim_reshaped[ij, kk, 5] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_z[ij, k, 1] + contravariant_vectors_z[ij, kk, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_z[ij, k, 2] + contravariant_vectors_z[ij, kk, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_z[ij, k, 3] + contravariant_vectors_z[ij, kk, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_avg * v_dot_n_avg - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_k = alpha * derivative_split[k, kk] - du_reshaped[ij, k, 1] += factor_k * f1 - du_reshaped[ij, k, 2] += factor_k * f2 - du_reshaped[ij, k, 3] += factor_k * f3 - du_reshaped[ij, k, 4] += factor_k * f4 - du_reshaped[ij, k, 5] += factor_k * f5 - - factor_kk = alpha * derivative_split[kk, k] - du_reshaped[ij, kk, 1] += factor_kk * f1 - du_reshaped[ij, kk, 2] += factor_kk * f2 - du_reshaped[ij, kk, 3] += factor_kk * f3 - du_reshaped[ij, kk, 4] += factor_kk * f4 - du_reshaped[ij, kk, 5] += factor_kk * f5 - end + # A possible permutation of array dimensions with improved opportunities for + # SIMD vectorization appeared to be slower than the direct version used here + # in preliminary numerical experiments on an AVX2 system. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for k in eachnode(dg), i in eachnode(dg) + rho_ll = u_prim[i, j, k, 1] + v1_ll = u_prim[i, j, k, 2] + v2_ll = u_prim[i, j, k, 3] + v3_ll = u_prim[i, j, k, 4] + p_ll = u_prim[i, j, k, 5] + + rho_rr = u_prim[i, jj, k, 1] + v1_rr = u_prim[i, jj, k, 2] + v2_rr = u_prim[i, jj, k, 3] + v3_rr = u_prim[i, jj, k, 4] + p_rr = u_prim[i, jj, k, 5] + + normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, k, 1] + + contravariant_vectors_y[i, jj, k, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, k, 2] + + contravariant_vectors_y[i, jj, k, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_y[i, j, k, 3] + + contravariant_vectors_y[i, jj, k, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, k, 1] += factor_j * f1 + du[i, j, k, 2] += factor_j * f2 + du[i, j, k, 3] += factor_j * f3 + du[i, j, k, 4] += factor_j * f4 + du[i, j, k, 5] += factor_j * f5 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, k, 1] += factor_jj * f1 + du[i, jj, k, 2] += factor_jj * f2 + du[i, jj, k, 3] += factor_jj * f3 + du[i, jj, k, 4] += factor_jj * f4 + du[i, jj, k, 5] += factor_jj * f5 + end end - end # GC.@preserve u_prim begin - - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, k, element] += du[i, j, k, v] - end + # z direction + # The memory layout is already optimal for SIMD vectorization in this loop. + # We just squeeze the first two dimensions to make the code slightly faster. + GC.@preserve u_prim begin + u_prim_reshaped = PtrArray(pointer(u_prim), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + du_reshaped = PtrArray(pointer(du), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + # We must also permute the contravariant vectors. + contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + ij = i + nnodes(dg) * (j - 1) + contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, + element] + contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, + element] + contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, + element] + end + + for k in eachnode(dg), kk in (k + 1):nnodes(dg) + @turbo for ij in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_reshaped[ij, k, 1] + v1_ll = u_prim_reshaped[ij, k, 2] + v2_ll = u_prim_reshaped[ij, k, 3] + v3_ll = u_prim_reshaped[ij, k, 4] + p_ll = u_prim_reshaped[ij, k, 5] + + rho_rr = u_prim_reshaped[ij, kk, 1] + v1_rr = u_prim_reshaped[ij, kk, 2] + v2_rr = u_prim_reshaped[ij, kk, 3] + v3_rr = u_prim_reshaped[ij, kk, 4] + p_rr = u_prim_reshaped[ij, kk, 5] + + normal_direction_1 = 0.5 * (contravariant_vectors_z[ij, k, 1] + + contravariant_vectors_z[ij, kk, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_z[ij, k, 2] + + contravariant_vectors_z[ij, kk, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_z[ij, k, 3] + + contravariant_vectors_z[ij, kk, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_avg * v_dot_n_avg + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * velocity_square_avg + + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_k = alpha * derivative_split[k, kk] + du_reshaped[ij, k, 1] += factor_k * f1 + du_reshaped[ij, k, 2] += factor_k * f2 + du_reshaped[ij, k, 3] += factor_k * f3 + du_reshaped[ij, k, 4] += factor_k * f4 + du_reshaped[ij, k, 5] += factor_k * f5 + + factor_kk = alpha * derivative_split[kk, k] + du_reshaped[ij, kk, 1] += factor_kk * f1 + du_reshaped[ij, kk, 2] += factor_kk * f2 + du_reshaped[ij, kk, 3] += factor_kk * f3 + du_reshaped[ij, kk, 4] += factor_kk * f4 + du_reshaped[ij, kk, 5] += factor_kk * f5 + end + end + end # GC.@preserve u_prim begin + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, k, element] += du[i, j, k, v] + end end - - @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + element, + mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - @unpack contravariant_vectors = cache.elements - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, k, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. In addition - # to the usual primitive variables, we also compute logarithms of the density - # and pressure to increase the performance of the required logarithmic mean - # values. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, k, element] - rho_v1 = u_cons[2, i, j, k, element] - rho_v2 = u_cons[3, i, j, k, element] - rho_v3 = u_cons[4, i, j, k, element] - rho_e = u_cons[5, i, j, k, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - - u_prim[i, j, k, 1] = rho - u_prim[i, j, k, 2] = v1 - u_prim[i, j, k, 3] = v2 - u_prim[i, j, k, 4] = v3 - u_prim[i, j, k, 5] = p - u_prim[i, j, k, 6] = log(rho) - u_prim[i, j, k, 7] = log(p) - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) - - @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # We must also permute the contravariant vectors. - contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element] - contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element] - contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element] - end - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for jk in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_permuted[jk, i, 1] - v1_ll = u_prim_permuted[jk, i, 2] - v2_ll = u_prim_permuted[jk, i, 3] - v3_ll = u_prim_permuted[jk, i, 4] - p_ll = u_prim_permuted[jk, i, 5] - log_rho_ll = u_prim_permuted[jk, i, 6] - log_p_ll = u_prim_permuted[jk, i, 7] - - rho_rr = u_prim_permuted[jk, ii, 1] - v1_rr = u_prim_permuted[jk, ii, 2] - v2_rr = u_prim_permuted[jk, ii, 3] - v3_rr = u_prim_permuted[jk, ii, 4] - p_rr = u_prim_permuted[jk, ii, 5] - log_rho_rr = u_prim_permuted[jk, ii, 6] - log_p_rr = u_prim_permuted[jk, ii, 7] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_x[jk, i, 1] + contravariant_vectors_x[jk, ii, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_x[jk, i, 2] + contravariant_vectors_x[jk, ii, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_x[jk, i, 3] + contravariant_vectors_x[jk, ii, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[jk, i, 1] += factor_i * f1 - du_permuted[jk, i, 2] += factor_i * f2 - du_permuted[jk, i, 3] += factor_i * f3 - du_permuted[jk, i, 4] += factor_i * f4 - du_permuted[jk, i, 5] += factor_i * f5 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[jk, ii, 1] += factor_ii * f1 - du_permuted[jk, ii, 2] += factor_ii * f2 - du_permuted[jk, ii, 3] += factor_ii * f3 - du_permuted[jk, ii, 4] += factor_ii * f4 - du_permuted[jk, ii, 5] += factor_ii * f5 + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, k, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. In addition + # to the usual primitive variables, we also compute logarithms of the density + # and pressure to increase the performance of the required logarithmic mean + # values. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, k, element] + rho_v1 = u_cons[2, i, j, k, element] + rho_v2 = u_cons[3, i, j, k, element] + rho_v3 = u_cons[4, i, j, k, element] + rho_e = u_cons[5, i, j, k, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + + u_prim[i, j, k, 1] = rho + u_prim[i, j, k, 2] = v1 + u_prim[i, j, k, 3] = v2 + u_prim[i, j, k, 4] = v3 + u_prim[i, j, k, 5] = p + u_prim[i, j, k, 6] = log(rho) + u_prim[i, j, k, 7] = log(p) end - end - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - du[i, j, k, v] = du_permuted[jk, i, v] - end - - - # y direction - # We must also permute the contravariant vectors. - contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] - contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element] - contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element] - end - - # A possible permutation of array dimensions with improved opportunities for - # SIMD vectorization appeared to be slower than the direct version used here - # in preliminary numerical experiments on an AVX2 system. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for k in eachnode(dg), i in eachnode(dg) - rho_ll = u_prim[i, j, k, 1] - v1_ll = u_prim[i, j, k, 2] - v2_ll = u_prim[i, j, k, 3] - v3_ll = u_prim[i, j, k, 4] - p_ll = u_prim[i, j, k, 5] - log_rho_ll = u_prim[i, j, k, 6] - log_p_ll = u_prim[i, j, k, 7] - - rho_rr = u_prim[i, jj, k, 1] - v1_rr = u_prim[i, jj, k, 2] - v2_rr = u_prim[i, jj, k, 3] - v3_rr = u_prim[i, jj, k, 4] - p_rr = u_prim[i, jj, k, 5] - log_rho_rr = u_prim[i, jj, k, 6] - log_p_rr = u_prim[i, jj, k, 7] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_y[i, j, k, 1] + contravariant_vectors_y[i, jj, k, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_y[i, j, k, 2] + contravariant_vectors_y[i, jj, k, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_y[i, j, k, 3] + contravariant_vectors_y[i, jj, k, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, k, 1] += factor_j * f1 - du[i, j, k, 2] += factor_j * f2 - du[i, j, k, 3] += factor_j * f3 - du[i, j, k, 4] += factor_j * f4 - du[i, j, k, 5] += factor_j * f5 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, k, 1] += factor_jj * f1 - du[i, jj, k, 2] += factor_jj * f2 - du[i, jj, k, 3] += factor_jj * f3 - du[i, jj, k, 4] += factor_jj * f4 - du[i, jj, k, 5] += factor_jj * f5 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + jk = j + nnodes(dg) * (k - 1) + u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] end - end + fill!(du_permuted, zero(eltype(du_permuted))) + # We must also permute the contravariant vectors. + contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) - # z direction - # The memory layout is already optimal for SIMD vectorization in this loop. - # We just squeeze the first two dimensions to make the code slightly faster. - GC.@preserve u_prim begin - u_prim_reshaped = PtrArray(pointer(u_prim), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + jk = j + nnodes(dg) * (k - 1) + contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element] + contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element] + contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element] + end - du_reshaped = PtrArray(pointer(du), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for jk in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_permuted[jk, i, 1] + v1_ll = u_prim_permuted[jk, i, 2] + v2_ll = u_prim_permuted[jk, i, 3] + v3_ll = u_prim_permuted[jk, i, 4] + p_ll = u_prim_permuted[jk, i, 5] + log_rho_ll = u_prim_permuted[jk, i, 6] + log_p_ll = u_prim_permuted[jk, i, 7] + + rho_rr = u_prim_permuted[jk, ii, 1] + v1_rr = u_prim_permuted[jk, ii, 2] + v2_rr = u_prim_permuted[jk, ii, 3] + v3_rr = u_prim_permuted[jk, ii, 4] + p_rr = u_prim_permuted[jk, ii, 5] + log_rho_rr = u_prim_permuted[jk, ii, 6] + log_p_rr = u_prim_permuted[jk, ii, 7] + + normal_direction_1 = 0.5 * (contravariant_vectors_x[jk, i, 1] + + contravariant_vectors_x[jk, ii, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_x[jk, i, 2] + + contravariant_vectors_x[jk, ii, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_x[jk, i, 3] + + contravariant_vectors_x[jk, ii, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[jk, i, 1] += factor_i * f1 + du_permuted[jk, i, 2] += factor_i * f2 + du_permuted[jk, i, 3] += factor_i * f3 + du_permuted[jk, i, 4] += factor_i * f4 + du_permuted[jk, i, 5] += factor_i * f5 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[jk, ii, 1] += factor_ii * f1 + du_permuted[jk, ii, 2] += factor_ii * f2 + du_permuted[jk, ii, 3] += factor_ii * f3 + du_permuted[jk, ii, 4] += factor_ii * f4 + du_permuted[jk, ii, 5] += factor_ii * f5 + end + end + + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + jk = j + nnodes(dg) * (k - 1) + du[i, j, k, v] = du_permuted[jk, i, v] + end + # y direction # We must also permute the contravariant vectors. - contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - ij = i + nnodes(dg) * (j- 1) - contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, element] - contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, element] - contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, element] + contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] + contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element] + contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element] end - for k in eachnode(dg), kk in (k+1):nnodes(dg) - @turbo for ij in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_reshaped[ij, k, 1] - v1_ll = u_prim_reshaped[ij, k, 2] - v2_ll = u_prim_reshaped[ij, k, 3] - v3_ll = u_prim_reshaped[ij, k, 4] - p_ll = u_prim_reshaped[ij, k, 5] - log_rho_ll = u_prim_reshaped[ij, k, 6] - log_p_ll = u_prim_reshaped[ij, k, 7] - - rho_rr = u_prim_reshaped[ij, kk, 1] - v1_rr = u_prim_reshaped[ij, kk, 2] - v2_rr = u_prim_reshaped[ij, kk, 3] - v3_rr = u_prim_reshaped[ij, kk, 4] - p_rr = u_prim_reshaped[ij, kk, 5] - log_rho_rr = u_prim_reshaped[ij, kk, 6] - log_p_rr = u_prim_reshaped[ij, kk, 7] - - normal_direction_1 = 0.5 * ( - contravariant_vectors_z[ij, k, 1] + contravariant_vectors_z[ij, kk, 1]) - normal_direction_2 = 0.5 * ( - contravariant_vectors_z[ij, k, 2] + contravariant_vectors_z[ij, kk, 2]) - normal_direction_3 = 0.5 * ( - contravariant_vectors_z[ij, k, 3] + contravariant_vectors_z[ij, kk, 3]) - - v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3 - v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3 - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * (p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr) - - # Calculate fluxes depending on normal_direction - f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) - f2 = f1 * v1_avg + p_avg * normal_direction_1 - f3 = f1 * v2_avg + p_avg * normal_direction_2 - f4 = f1 * v3_avg + p_avg * normal_direction_3 - f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) - + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) ) - - # Add scaled fluxes to RHS - factor_k = alpha * derivative_split[k, kk] - du_reshaped[ij, k, 1] += factor_k * f1 - du_reshaped[ij, k, 2] += factor_k * f2 - du_reshaped[ij, k, 3] += factor_k * f3 - du_reshaped[ij, k, 4] += factor_k * f4 - du_reshaped[ij, k, 5] += factor_k * f5 - - factor_kk = alpha * derivative_split[kk, k] - du_reshaped[ij, kk, 1] += factor_kk * f1 - du_reshaped[ij, kk, 2] += factor_kk * f2 - du_reshaped[ij, kk, 3] += factor_kk * f3 - du_reshaped[ij, kk, 4] += factor_kk * f4 - du_reshaped[ij, kk, 5] += factor_kk * f5 - end + # A possible permutation of array dimensions with improved opportunities for + # SIMD vectorization appeared to be slower than the direct version used here + # in preliminary numerical experiments on an AVX2 system. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for k in eachnode(dg), i in eachnode(dg) + rho_ll = u_prim[i, j, k, 1] + v1_ll = u_prim[i, j, k, 2] + v2_ll = u_prim[i, j, k, 3] + v3_ll = u_prim[i, j, k, 4] + p_ll = u_prim[i, j, k, 5] + log_rho_ll = u_prim[i, j, k, 6] + log_p_ll = u_prim[i, j, k, 7] + + rho_rr = u_prim[i, jj, k, 1] + v1_rr = u_prim[i, jj, k, 2] + v2_rr = u_prim[i, jj, k, 3] + v3_rr = u_prim[i, jj, k, 4] + p_rr = u_prim[i, jj, k, 5] + log_rho_rr = u_prim[i, jj, k, 6] + log_p_rr = u_prim[i, jj, k, 7] + + normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, k, 1] + + contravariant_vectors_y[i, jj, k, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, k, 2] + + contravariant_vectors_y[i, jj, k, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_y[i, j, k, 3] + + contravariant_vectors_y[i, jj, k, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, k, 1] += factor_j * f1 + du[i, j, k, 2] += factor_j * f2 + du[i, j, k, 3] += factor_j * f3 + du[i, j, k, 4] += factor_j * f4 + du[i, j, k, 5] += factor_j * f5 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, k, 1] += factor_jj * f1 + du[i, jj, k, 2] += factor_jj * f2 + du[i, jj, k, 3] += factor_jj * f3 + du[i, jj, k, 4] += factor_jj * f4 + du[i, jj, k, 5] += factor_jj * f5 + end end - end # GC.@preserve u_prim begin - - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, k, element] += du[i, j, k, v] - end + # z direction + # The memory layout is already optimal for SIMD vectorization in this loop. + # We just squeeze the first two dimensions to make the code slightly faster. + GC.@preserve u_prim begin + u_prim_reshaped = PtrArray(pointer(u_prim), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + du_reshaped = PtrArray(pointer(du), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + # We must also permute the contravariant vectors. + contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(ndims(mesh)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + ij = i + nnodes(dg) * (j - 1) + contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, + element] + contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, + element] + contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, + element] + end + + for k in eachnode(dg), kk in (k + 1):nnodes(dg) + @turbo for ij in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_reshaped[ij, k, 1] + v1_ll = u_prim_reshaped[ij, k, 2] + v2_ll = u_prim_reshaped[ij, k, 3] + v3_ll = u_prim_reshaped[ij, k, 4] + p_ll = u_prim_reshaped[ij, k, 5] + log_rho_ll = u_prim_reshaped[ij, k, 6] + log_p_ll = u_prim_reshaped[ij, k, 7] + + rho_rr = u_prim_reshaped[ij, kk, 1] + v1_rr = u_prim_reshaped[ij, kk, 2] + v2_rr = u_prim_reshaped[ij, kk, 3] + v3_rr = u_prim_reshaped[ij, kk, 4] + p_rr = u_prim_reshaped[ij, kk, 5] + log_rho_rr = u_prim_reshaped[ij, kk, 6] + log_p_rr = u_prim_reshaped[ij, kk, 7] + + normal_direction_1 = 0.5 * (contravariant_vectors_z[ij, k, 1] + + contravariant_vectors_z[ij, kk, 1]) + normal_direction_2 = 0.5 * (contravariant_vectors_z[ij, k, 2] + + contravariant_vectors_z[ij, kk, 2]) + normal_direction_3 = 0.5 * (contravariant_vectors_z[ij, k, 3] + + contravariant_vectors_z[ij, kk, 3]) + + v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + + v3_ll * normal_direction_3 + v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + + v3_rr * normal_direction_3 + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * + ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on normal_direction + f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr) + f2 = f1 * v1_avg + p_avg * normal_direction_1 + f3 = f1 * v2_avg + p_avg * normal_direction_2 + f4 = f1 * v3_avg + p_avg * normal_direction_3 + f5 = (f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll)) + + # Add scaled fluxes to RHS + factor_k = alpha * derivative_split[k, kk] + du_reshaped[ij, k, 1] += factor_k * f1 + du_reshaped[ij, k, 2] += factor_k * f2 + du_reshaped[ij, k, 3] += factor_k * f3 + du_reshaped[ij, k, 4] += factor_k * f4 + du_reshaped[ij, k, 5] += factor_k * f5 + + factor_kk = alpha * derivative_split[kk, k] + du_reshaped[ij, kk, 1] += factor_kk * f1 + du_reshaped[ij, kk, 2] += factor_kk * f2 + du_reshaped[ij, kk, 3] += factor_kk * f3 + du_reshaped[ij, kk, 4] += factor_kk * f4 + du_reshaped[ij, kk, 5] += factor_kk * f5 + end + end + end # GC.@preserve u_prim begin + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, k, element] += du[i, j, k, v] + end end diff --git a/src/solvers/dgsem_structured/indicators_1d.jl b/src/solvers/dgsem_structured/indicators_1d.jl index 9bb66d34e9e..4299ec603dd 100644 --- a/src/solvers/dgsem_structured/indicators_1d.jl +++ b/src/solvers/dgsem_structured/indicators_1d.jl @@ -3,24 +3,24 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent function apply_smoothing!(mesh::StructuredMesh{1}, alpha, alpha_tmp, dg, cache) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. - @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" + # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. + @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" - # Loop over elements, because there is no interface container - for element in eachelement(dg,cache) - # Get neighboring element ids - left = cache.elements.left_neighbors[1, element] + # Loop over elements, because there is no interface container + for element in eachelement(dg, cache) + # Get neighboring element ids + left = cache.elements.left_neighbors[1, element] - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) - end + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) + end end - end # @muladd diff --git a/src/solvers/dgsem_structured/indicators_2d.jl b/src/solvers/dgsem_structured/indicators_2d.jl index abc054b572a..f4b07b70cb8 100644 --- a/src/solvers/dgsem_structured/indicators_2d.jl +++ b/src/solvers/dgsem_structured/indicators_2d.jl @@ -3,28 +3,28 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent function apply_smoothing!(mesh::StructuredMesh{2}, alpha, alpha_tmp, dg, cache) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. - @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" + # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. + @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" - # Loop over elements, because there is no interface container - for element in eachelement(dg,cache) - # Get neighboring element ids - left = cache.elements.left_neighbors[1, element] - lower = cache.elements.left_neighbors[2, element] + # Loop over elements, because there is no interface container + for element in eachelement(dg, cache) + # Get neighboring element ids + left = cache.elements.left_neighbors[1, element] + lower = cache.elements.left_neighbors[2, element] - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) - alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element]) - end + alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element]) + end end - end # @muladd diff --git a/src/solvers/dgsem_structured/indicators_3d.jl b/src/solvers/dgsem_structured/indicators_3d.jl index 4d3c4df278b..155bf50dc68 100644 --- a/src/solvers/dgsem_structured/indicators_3d.jl +++ b/src/solvers/dgsem_structured/indicators_3d.jl @@ -3,32 +3,32 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent function apply_smoothing!(mesh::StructuredMesh{3}, alpha, alpha_tmp, dg, cache) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. - @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" + # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes. + @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far" - # Loop over elements, because there is no interface container - for element in eachelement(dg,cache) - # Get neighboring element ids - left = cache.elements.left_neighbors[1, element] - lower = cache.elements.left_neighbors[2, element] - front = cache.elements.left_neighbors[3, element] + # Loop over elements, because there is no interface container + for element in eachelement(dg, cache) + # Get neighboring element ids + left = cache.elements.left_neighbors[1, element] + lower = cache.elements.left_neighbors[2, element] + front = cache.elements.left_neighbors[3, element] - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element]) - alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element]) - - alpha[front] = max(alpha_tmp[front], 0.5 * alpha_tmp[element], alpha[front]) - alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[front], alpha[element]) - end -end + alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element]) + alpha[front] = max(alpha_tmp[front], 0.5 * alpha_tmp[element], alpha[front]) + alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[front], alpha[element]) + end +end end # @muladd diff --git a/src/solvers/dgsem_tree/containers.jl b/src/solvers/dgsem_tree/containers.jl index 92bce0ce830..bba8b83b23a 100644 --- a/src/solvers/dgsem_tree/containers.jl +++ b/src/solvers/dgsem_tree/containers.jl @@ -3,58 +3,55 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Dimension independent code related to containers of the DG solver # with the mesh type TreeMesh function reinitialize_containers!(mesh::TreeMesh, equations, dg::DGSEM, cache) - # Get new list of leaf cells - leaf_cell_ids = local_leaf_cells(mesh.tree) - - # re-initialize elements container - @unpack elements = cache - resize!(elements, length(leaf_cell_ids)) - init_elements!(elements, leaf_cell_ids, mesh, dg.basis) - - # re-initialize interfaces container - @unpack interfaces = cache - resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) - init_interfaces!(interfaces, elements, mesh) - - # re-initialize boundaries container - @unpack boundaries = cache - resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) - init_boundaries!(boundaries, elements, mesh) - - # re-initialize mortars container - @unpack mortars = cache - resize!(mortars, count_required_mortars(mesh, leaf_cell_ids)) - init_mortars!(mortars, elements, mesh) - - if mpi_isparallel() - # re-initialize mpi_interfaces container - @unpack mpi_interfaces = cache - resize!(mpi_interfaces, count_required_mpi_interfaces(mesh, leaf_cell_ids)) - init_mpi_interfaces!(mpi_interfaces, elements, mesh) - - # re-initialize mpi_mortars container - @unpack mpi_mortars = cache - resize!(mpi_mortars, count_required_mpi_mortars(mesh, leaf_cell_ids)) - init_mpi_mortars!(mpi_mortars, elements, mesh) - - # re-initialize mpi cache - @unpack mpi_cache = cache - init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, - nvariables(equations), nnodes(dg), eltype(elements)) - end + # Get new list of leaf cells + leaf_cell_ids = local_leaf_cells(mesh.tree) + + # re-initialize elements container + @unpack elements = cache + resize!(elements, length(leaf_cell_ids)) + init_elements!(elements, leaf_cell_ids, mesh, dg.basis) + + # re-initialize interfaces container + @unpack interfaces = cache + resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids)) + init_interfaces!(interfaces, elements, mesh) + + # re-initialize boundaries container + @unpack boundaries = cache + resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids)) + init_boundaries!(boundaries, elements, mesh) + + # re-initialize mortars container + @unpack mortars = cache + resize!(mortars, count_required_mortars(mesh, leaf_cell_ids)) + init_mortars!(mortars, elements, mesh) + + if mpi_isparallel() + # re-initialize mpi_interfaces container + @unpack mpi_interfaces = cache + resize!(mpi_interfaces, count_required_mpi_interfaces(mesh, leaf_cell_ids)) + init_mpi_interfaces!(mpi_interfaces, elements, mesh) + + # re-initialize mpi_mortars container + @unpack mpi_mortars = cache + resize!(mpi_mortars, count_required_mpi_mortars(mesh, leaf_cell_ids)) + init_mpi_mortars!(mpi_mortars, elements, mesh) + + # re-initialize mpi cache + @unpack mpi_cache = cache + init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, + nvariables(equations), nnodes(dg), eltype(elements)) + end end - # Dimension-specific implementations include("containers_1d.jl") include("containers_2d.jl") include("containers_3d.jl") - - end # @muladd diff --git a/src/solvers/dgsem_tree/containers_1d.jl b/src/solvers/dgsem_tree/containers_1d.jl index f3255c6c953..ecbcc1c4d9a 100644 --- a/src/solvers/dgsem_tree/containers_1d.jl +++ b/src/solvers/dgsem_tree/containers_1d.jl @@ -3,17 +3,17 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Container data structure (structure-of-arrays style) for DG elements -mutable struct ElementContainer1D{RealT<:Real, uEltype<:Real} <: AbstractContainer - inverse_jacobian::Vector{RealT} # [elements] - node_coordinates::Array{RealT, 3} # [orientation, i, elements] - surface_flux_values::Array{uEltype, 3} # [variables, direction, elements] - cell_ids::Vector{Int} # [elements] - # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _surface_flux_values::Vector{uEltype} +mutable struct ElementContainer1D{RealT <: Real, uEltype <: Real} <: AbstractContainer + inverse_jacobian::Vector{RealT} # [elements] + node_coordinates::Array{RealT, 3} # [orientation, i, elements] + surface_flux_values::Array{uEltype, 3} # [variables, direction, elements] + cell_ids::Vector{Int} # [elements] + # internal `resize!`able storage + _node_coordinates::Vector{RealT} + _surface_flux_values::Vector{uEltype} end nvariables(elements::ElementContainer1D) = size(elements.surface_flux_values, 1) @@ -26,50 +26,50 @@ Base.eltype(elements::ElementContainer1D) = eltype(elements.surface_flux_values) # `unsafe_wrap`ping multi-dimensional `Array`s around the # internal storage. function Base.resize!(elements::ElementContainer1D, capacity) - n_nodes = nnodes(elements) - n_variables = nvariables(elements) - @unpack _node_coordinates, _surface_flux_values, - inverse_jacobian, cell_ids = elements + n_nodes = nnodes(elements) + n_variables = nvariables(elements) + @unpack _node_coordinates, _surface_flux_values, + inverse_jacobian, cell_ids = elements - resize!(inverse_jacobian, capacity) + resize!(inverse_jacobian, capacity) - resize!(_node_coordinates, 1 * n_nodes * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (1, n_nodes, capacity)) + resize!(_node_coordinates, 1 * n_nodes * capacity) + elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (1, n_nodes, capacity)) - resize!(_surface_flux_values, n_variables * 2 * 1 * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, 2 * 1, capacity)) + resize!(_surface_flux_values, n_variables * 2 * 1 * capacity) + elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, 2 * 1, capacity)) - resize!(cell_ids, capacity) + resize!(cell_ids, capacity) - return nothing + return nothing end +function ElementContainer1D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function ElementContainer1D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - inverse_jacobian = fill(nan_RealT, capacity) + # Initialize fields with defaults + inverse_jacobian = fill(nan_RealT, capacity) - _node_coordinates = fill(nan_RealT, 1 * n_nodes * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (1, n_nodes, capacity)) + _node_coordinates = fill(nan_RealT, 1 * n_nodes * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (1, n_nodes, capacity)) - _surface_flux_values = fill(nan_uEltype, n_variables * 2 * 1 * capacity) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, 2 * 1, capacity)) + _surface_flux_values = fill(nan_uEltype, n_variables * 2 * 1 * capacity) + surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, 2 * 1, capacity)) - cell_ids = fill(typemin(Int), capacity) + cell_ids = fill(typemin(Int), capacity) - return ElementContainer1D{RealT, uEltype}( - inverse_jacobian, node_coordinates, surface_flux_values, cell_ids, - _node_coordinates, _surface_flux_values) + return ElementContainer1D{RealT, uEltype}(inverse_jacobian, node_coordinates, + surface_flux_values, cell_ids, + _node_coordinates, _surface_flux_values) end - # Return number of elements @inline nelements(elements::ElementContainer1D) = length(elements.cell_ids) # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements)) @@ -83,68 +83,68 @@ In particular, not the elements themselves are returned. @inline eachelement(elements::ElementContainer1D) = Base.OneTo(nelements(elements)) @inline Base.real(elements::ElementContainer1D) = eltype(elements.node_coordinates) - # Create element container and initialize element data function init_elements(cell_ids, mesh::TreeMesh1D, equations::AbstractEquations{1}, - basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real} - # Initialize container - n_elements = length(cell_ids) - elements = ElementContainer1D{RealT, uEltype}( - n_elements, nvariables(equations), nnodes(basis)) - - init_elements!(elements, cell_ids, mesh, basis) - return elements + basis, ::Type{RealT}, + ::Type{uEltype}) where {RealT <: Real, uEltype <: Real} + # Initialize container + n_elements = length(cell_ids) + elements = ElementContainer1D{RealT, uEltype}(n_elements, nvariables(equations), + nnodes(basis)) + + init_elements!(elements, cell_ids, mesh, basis) + return elements end function init_elements!(elements, cell_ids, mesh::TreeMesh1D, basis) - nodes = get_nodes(basis) - # Compute the length of the 1D reference interval by integrating - # the function with constant value unity on the corresponding - # element data type (using \circ) - reference_length = integrate(one ∘ eltype, nodes, basis) - # Compute the offset of the midpoint of the 1D reference interval - # (its difference from zero) - reference_offset = first(nodes) + reference_length / 2 - - # Store cell ids - elements.cell_ids .= cell_ids - - # Calculate inverse Jacobian and node coordinates - for element in eachelement(elements) - # Get cell id - cell_id = cell_ids[element] - - # Get cell length - dx = length_at_cell(mesh.tree, cell_id) - - # Calculate inverse Jacobian - jacobian = dx / reference_length - elements.inverse_jacobian[element] = inv(jacobian) - - # Calculate node coordinates - # Note that the `tree_coordinates` are the midpoints of the cells. - # Hence, we need to add an offset for `nodes` with a midpoint - # different from zero. - for i in eachnode(basis) - elements.node_coordinates[1, i, element] = ( - mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset)) - end - end - - return elements -end - + nodes = get_nodes(basis) + # Compute the length of the 1D reference interval by integrating + # the function with constant value unity on the corresponding + # element data type (using \circ) + reference_length = integrate(one ∘ eltype, nodes, basis) + # Compute the offset of the midpoint of the 1D reference interval + # (its difference from zero) + reference_offset = (first(nodes) + last(nodes)) / 2 + + # Store cell ids + elements.cell_ids .= cell_ids + + # Calculate inverse Jacobian and node coordinates + for element in eachelement(elements) + # Get cell id + cell_id = cell_ids[element] + + # Get cell length + dx = length_at_cell(mesh.tree, cell_id) + + # Calculate inverse Jacobian + jacobian = dx / reference_length + elements.inverse_jacobian[element] = inv(jacobian) + + # Calculate node coordinates + # Note that the `tree_coordinates` are the midpoints of the cells. + # Hence, we need to add an offset for `nodes` with a midpoint + # different from zero. + for i in eachnode(basis) + elements.node_coordinates[1, i, element] = (mesh.tree.coordinates[1, + cell_id] + + jacobian * + (nodes[i] - reference_offset)) + end + end + return elements +end # Container data structure (structure-of-arrays style) for DG interfaces -mutable struct InterfaceContainer1D{uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 3} # [leftright, variables, interfaces] - neighbor_ids::Matrix{Int} # [leftright, interfaces] - orientations::Vector{Int} # [interfaces] - # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} +mutable struct InterfaceContainer1D{uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 3} # [leftright, variables, interfaces] + neighbor_ids::Matrix{Int} # [leftright, interfaces] + orientations::Vector{Int} # [interfaces] + # internal `resize!`able storage + _u::Vector{uEltype} + _neighbor_ids::Vector{Int} end nvariables(interfaces::InterfaceContainer1D) = size(interfaces.u, 2) @@ -152,151 +152,148 @@ Base.eltype(interfaces::InterfaceContainer1D) = eltype(interfaces.u) # See explanation of Base.resize! for the element container function Base.resize!(interfaces::InterfaceContainer1D, capacity) - n_variables = nvariables(interfaces) - @unpack _u, _neighbor_ids, orientations = interfaces + n_variables = nvariables(interfaces) + @unpack _u, _neighbor_ids, orientations = interfaces - resize!(_u, 2 * n_variables * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, capacity)) + resize!(_u, 2 * n_variables * capacity) + interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, capacity)) - resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + resize!(_neighbor_ids, 2 * capacity) + interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function InterfaceContainer1D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function InterfaceContainer1D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) + # Initialize fields with defaults + _u = fill(nan, 2 * n_variables * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, capacity)) - # Initialize fields with defaults - _u = fill(nan, 2 * n_variables * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, capacity)) + _neighbor_ids = fill(typemin(Int), 2 * capacity) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) - _neighbor_ids = fill(typemin(Int), 2 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + orientations = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) - - return InterfaceContainer1D{uEltype}( - u, neighbor_ids, orientations, - _u, _neighbor_ids) + return InterfaceContainer1D{uEltype}(u, neighbor_ids, orientations, + _u, _neighbor_ids) end - # Return number of interfaces @inline ninterfaces(interfaces::InterfaceContainer1D) = length(interfaces.orientations) - # Create interface container and initialize interface data in `elements`. function init_interfaces(cell_ids, mesh::TreeMesh1D, elements::ElementContainer1D) - # Initialize container - n_interfaces = count_required_interfaces(mesh, cell_ids) - interfaces = InterfaceContainer1D{eltype(elements)}( - n_interfaces, nvariables(elements), nnodes(elements)) - - # Connect elements with interfaces - init_interfaces!(interfaces, elements, mesh) - return interfaces + # Initialize container + n_interfaces = count_required_interfaces(mesh, cell_ids) + interfaces = InterfaceContainer1D{eltype(elements)}(n_interfaces, + nvariables(elements), + nnodes(elements)) + + # Connect elements with interfaces + init_interfaces!(interfaces, elements, mesh) + return interfaces end # Count the number of interfaces that need to be created function count_required_interfaces(mesh::TreeMesh1D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # Only count interfaces in positive direction to avoid double counting - if direction == 1 - continue - end - - # Skip if no neighbor exists - if !has_any_neighbor(mesh.tree, cell_id, direction) - continue - end - - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # Only count interfaces in positive direction to avoid double counting + if direction == 1 + continue + end + + # Skip if no neighbor exists + if !has_any_neighbor(mesh.tree, cell_id, direction) + continue + end + + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and interfaces function init_interfaces!(interfaces, elements, mesh::TreeMesh1D) - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end - - # Reset interface count - count = 0 + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # Iterate over all elements to find neighbors and to connect via interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - # Loop over directions - for direction in eachdirection(mesh.tree) - # Only create interfaces in positive direction - if direction == 1 - continue - end - - # Skip if no neighbor exists and current cell is not small - if !has_any_neighbor(mesh.tree, cell_id, direction) - continue - end - - count += 1 - - if has_neighbor(mesh.tree, cell_id, direction) - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor - interfaces.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - else # Cell has same refinement level neighbor - interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] + # Iterate over all elements to find neighbors and to connect via interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # Loop over directions + for direction in eachdirection(mesh.tree) + # Only create interfaces in positive direction + if direction == 1 + continue + end + + # Skip if no neighbor exists and current cell is not small + if !has_any_neighbor(mesh.tree, cell_id, direction) + continue + end + + count += 1 + + if has_neighbor(mesh.tree, cell_id, direction) + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor + interfaces.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + else # Cell has same refinement level neighbor + interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] + end + else # Cell is small and has large neighbor + parent_id = mesh.tree.parent_ids[cell_id] + neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id] + interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] + end + + interfaces.neighbor_ids[1, count] = element + # Set orientation (x -> 1) + interfaces.orientations[count] = 1 end - else # Cell is small and has large neighbor - parent_id = mesh.tree.parent_ids[cell_id] - neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id] - interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] - end - - interfaces.neighbor_ids[1, count] = element - # Set orientation (x -> 1) - interfaces.orientations[count] = 1 end - end - @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " * + @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "* "expectations $(ninterfaces(interfaces))") end - - # Container data structure (structure-of-arrays style) for DG boundaries -mutable struct BoundaryContainer1D{RealT<:Real, uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 3} # [leftright, variables, boundaries] - neighbor_ids::Vector{Int} # [boundaries] - orientations::Vector{Int} # [boundaries] - neighbor_sides::Vector{Int} # [boundaries] - node_coordinates::Array{RealT, 2} # [orientation, elements] - n_boundaries_per_direction::SVector{2, Int} # [direction] - # internal `resize!`able storage - _u::Vector{uEltype} - _node_coordinates::Vector{RealT} +mutable struct BoundaryContainer1D{RealT <: Real, uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 3} # [leftright, variables, boundaries] + neighbor_ids::Vector{Int} # [boundaries] + orientations::Vector{Int} # [boundaries] + neighbor_sides::Vector{Int} # [boundaries] + node_coordinates::Array{RealT, 2} # [orientation, elements] + n_boundaries_per_direction::SVector{2, Int} # [direction] + # internal `resize!`able storage + _u::Vector{uEltype} + _node_coordinates::Vector{RealT} end nvariables(boundaries::BoundaryContainer1D) = size(boundaries.u, 2) @@ -304,163 +301,162 @@ Base.eltype(boundaries::BoundaryContainer1D) = eltype(boundaries.u) # See explanation of Base.resize! for the element container function Base.resize!(boundaries::BoundaryContainer1D, capacity) - n_variables = nvariables(boundaries) - @unpack _u, _node_coordinates, - neighbor_ids, orientations, neighbor_sides = boundaries + n_variables = nvariables(boundaries) + @unpack _u, _node_coordinates, + neighbor_ids, orientations, neighbor_sides = boundaries - resize!(_u, 2 * n_variables * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, capacity)) + resize!(_u, 2 * n_variables * capacity) + boundaries.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, capacity)) - resize!(_node_coordinates, 1 * capacity) - boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (1, capacity)) + resize!(_node_coordinates, 1 * capacity) + boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (1, capacity)) - resize!(neighbor_ids, capacity) + resize!(neighbor_ids, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - resize!(neighbor_sides, capacity) + resize!(neighbor_sides, capacity) - return nothing + return nothing end +function BoundaryContainer1D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function BoundaryContainer1D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan_uEltype, 2 * n_variables * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, capacity)) + # Initialize fields with defaults + _u = fill(nan_uEltype, 2 * n_variables * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, capacity)) - neighbor_ids = fill(typemin(Int), capacity) + neighbor_ids = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - neighbor_sides = fill(typemin(Int), capacity) + neighbor_sides = fill(typemin(Int), capacity) - _node_coordinates = fill(nan_RealT, 1 * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (1, capacity)) + _node_coordinates = fill(nan_RealT, 1 * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (1, capacity)) - n_boundaries_per_direction = SVector(0, 0) + n_boundaries_per_direction = SVector(0, 0) - return BoundaryContainer1D{RealT, uEltype}( - u, neighbor_ids, orientations, neighbor_sides, - node_coordinates, n_boundaries_per_direction, - _u, _node_coordinates) + return BoundaryContainer1D{RealT, uEltype}(u, neighbor_ids, orientations, + neighbor_sides, + node_coordinates, + n_boundaries_per_direction, + _u, _node_coordinates) end - # Return number of boundaries nboundaries(boundaries::BoundaryContainer1D) = length(boundaries.orientations) - # Create boundaries container and initialize boundary data in `elements`. function init_boundaries(cell_ids, mesh::TreeMesh1D, elements::ElementContainer1D) - # Initialize container - n_boundaries = count_required_boundaries(mesh, cell_ids) - boundaries = BoundaryContainer1D{real(elements), eltype(elements)}( - n_boundaries, nvariables(elements), nnodes(elements)) - - # Connect elements with boundaries - init_boundaries!(boundaries, elements, mesh) - return boundaries + # Initialize container + n_boundaries = count_required_boundaries(mesh, cell_ids) + boundaries = BoundaryContainer1D{real(elements), eltype(elements)}(n_boundaries, + nvariables(elements), + nnodes(elements)) + + # Connect elements with boundaries + init_boundaries!(boundaries, elements, mesh) + return boundaries end # Count the number of boundaries that need to be created function count_required_boundaries(mesh::TreeMesh1D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # No neighbor exists in this direction -> must be a boundary - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # No neighbor exists in this direction -> must be a boundary + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and boundaries function init_boundaries!(boundaries, elements, mesh::TreeMesh1D) - # Reset boundaries count - count = 0 - - # Initialize boundary counts - counts_per_direction = MVector(0, 0) - - # OBS! Iterate over directions first, then over elements, and count boundaries in each direction - # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., - # obviating the need to store the boundary condition to be applied explicitly. - # Loop over directions - for direction in eachdirection(mesh.tree) - # Iterate over all elements to find missing neighbors and to connect to boundaries - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] - - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Create boundary - count += 1 - counts_per_direction[direction] += 1 - - # Set neighbor element id - boundaries.neighbor_ids[count] = element - - # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element - if direction == 2 - boundaries.neighbor_sides[count] = 1 - else - boundaries.neighbor_sides[count] = 2 - end - - # Set orientation (x -> 1) - boundaries.orientations[count] = 1 - - # Store node coordinates - enc = elements.node_coordinates - if direction == 1 # -x direction - boundaries.node_coordinates[:, count] .= enc[:, 1, element] - elseif direction == 2 # +x direction - boundaries.node_coordinates[:, count] .= enc[:, end, element] - else - error("should not happen") - end + # Reset boundaries count + count = 0 + + # Initialize boundary counts + counts_per_direction = MVector(0, 0) + + # OBS! Iterate over directions first, then over elements, and count boundaries in each direction + # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., + # obviating the need to store the boundary condition to be applied explicitly. + # Loop over directions + for direction in eachdirection(mesh.tree) + # Iterate over all elements to find missing neighbors and to connect to boundaries + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Create boundary + count += 1 + counts_per_direction[direction] += 1 + + # Set neighbor element id + boundaries.neighbor_ids[count] = element + + # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element + if direction == 2 + boundaries.neighbor_sides[count] = 1 + else + boundaries.neighbor_sides[count] = 2 + end + + # Set orientation (x -> 1) + boundaries.orientations[count] = 1 + + # Store node coordinates + enc = elements.node_coordinates + if direction == 1 # -x direction + boundaries.node_coordinates[:, count] .= enc[:, 1, element] + elseif direction == 2 # +x direction + boundaries.node_coordinates[:, count] .= enc[:, end, element] + else + error("should not happen") + end + end end - end - @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " * + @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "* "expectations $(nboundaries(boundaries))") - @assert sum(counts_per_direction) == count + @assert sum(counts_per_direction) == count - boundaries.n_boundaries_per_direction = SVector(counts_per_direction) + boundaries.n_boundaries_per_direction = SVector(counts_per_direction) - return boundaries.n_boundaries_per_direction + return boundaries.n_boundaries_per_direction end - - end # @muladd diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl index 9f874d50464..5cf256d3499 100644 --- a/src/solvers/dgsem_tree/containers_2d.jl +++ b/src/solvers/dgsem_tree/containers_2d.jl @@ -3,17 +3,17 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Container data structure (structure-of-arrays style) for DG elements -mutable struct ElementContainer2D{RealT<:Real, uEltype<:Real} <: AbstractContainer - inverse_jacobian::Vector{RealT} # [elements] - node_coordinates::Array{RealT, 4} # [orientation, i, j, elements] - surface_flux_values::Array{uEltype, 4} # [variables, i, direction, elements] - cell_ids::Vector{Int} # [elements] - # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _surface_flux_values::Vector{uEltype} +mutable struct ElementContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer + inverse_jacobian::Vector{RealT} # [elements] + node_coordinates::Array{RealT, 4} # [orientation, i, j, elements] + surface_flux_values::Array{uEltype, 4} # [variables, i, direction, elements] + cell_ids::Vector{Int} # [elements] + # internal `resize!`able storage + _node_coordinates::Vector{RealT} + _surface_flux_values::Vector{uEltype} end nvariables(elements::ElementContainer2D) = size(elements.surface_flux_values, 1) @@ -26,51 +26,50 @@ Base.eltype(elements::ElementContainer2D) = eltype(elements.surface_flux_values) # `unsafe_wrap`ping multi-dimensional `Array`s around the # internal storage. function Base.resize!(elements::ElementContainer2D, capacity) - n_nodes = nnodes(elements) - n_variables = nvariables(elements) - @unpack _node_coordinates, _surface_flux_values, - inverse_jacobian, cell_ids = elements + n_nodes = nnodes(elements) + n_variables = nvariables(elements) + @unpack _node_coordinates, _surface_flux_values, + inverse_jacobian, cell_ids = elements - resize!(inverse_jacobian, capacity) + resize!(inverse_jacobian, capacity) - resize!(_node_coordinates, 2 * n_nodes * n_nodes * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (2, n_nodes, n_nodes, capacity)) + resize!(_node_coordinates, 2 * n_nodes * n_nodes * capacity) + elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (2, n_nodes, n_nodes, capacity)) - resize!(_surface_flux_values, n_variables * n_nodes * 2 * 2 * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, n_nodes, 2 * 2, capacity)) + resize!(_surface_flux_values, n_variables * n_nodes * 2 * 2 * capacity) + elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, n_nodes, 2 * 2, capacity)) - resize!(cell_ids, capacity) + resize!(cell_ids, capacity) - return nothing + return nothing end +function ElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function ElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - inverse_jacobian = fill(nan_RealT, capacity) + # Initialize fields with defaults + inverse_jacobian = fill(nan_RealT, capacity) - _node_coordinates = fill(nan_RealT, 2 * n_nodes * n_nodes * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (2, n_nodes, n_nodes, capacity)) + _node_coordinates = fill(nan_RealT, 2 * n_nodes * n_nodes * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (2, n_nodes, n_nodes, capacity)) - _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * 2 * 2 * capacity) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, n_nodes, 2 * 2, capacity)) + _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * 2 * 2 * capacity) + surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, n_nodes, 2 * 2, capacity)) - cell_ids = fill(typemin(Int), capacity) + cell_ids = fill(typemin(Int), capacity) - - return ElementContainer2D{RealT, uEltype}( - inverse_jacobian, node_coordinates, surface_flux_values, cell_ids, - _node_coordinates, _surface_flux_values) + return ElementContainer2D{RealT, uEltype}(inverse_jacobian, node_coordinates, + surface_flux_values, cell_ids, + _node_coordinates, _surface_flux_values) end - # Return number of elements @inline nelements(elements::ElementContainer2D) = length(elements.cell_ids) # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements)) @@ -84,70 +83,72 @@ In particular, not the elements themselves are returned. @inline eachelement(elements::ElementContainer2D) = Base.OneTo(nelements(elements)) @inline Base.real(elements::ElementContainer2D) = eltype(elements.node_coordinates) - # Create element container and initialize element data function init_elements(cell_ids, mesh::TreeMesh2D, equations::AbstractEquations{2}, - basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real} - # Initialize container - n_elements = length(cell_ids) - elements = ElementContainer2D{RealT, uEltype}( - n_elements, nvariables(equations), nnodes(basis)) - - init_elements!(elements, cell_ids, mesh, basis) - return elements + basis, ::Type{RealT}, + ::Type{uEltype}) where {RealT <: Real, uEltype <: Real} + # Initialize container + n_elements = length(cell_ids) + elements = ElementContainer2D{RealT, uEltype}(n_elements, nvariables(equations), + nnodes(basis)) + + init_elements!(elements, cell_ids, mesh, basis) + return elements end function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis) - nodes = get_nodes(basis) - # Compute the length of the 1D reference interval by integrating - # the function with constant value unity on the corresponding - # element data type (using \circ) - reference_length = integrate(one ∘ eltype, nodes, basis) - # Compute the offset of the midpoint of the 1D reference interval - # (its difference from zero) - reference_offset = first(nodes) + reference_length / 2 - - # Store cell ids - elements.cell_ids .= cell_ids - - # Calculate inverse Jacobian and node coordinates - for element in eachelement(elements) - # Get cell id - cell_id = cell_ids[element] - - # Get cell length - dx = length_at_cell(mesh.tree, cell_id) - - # Calculate inverse Jacobian - jacobian = dx / reference_length - elements.inverse_jacobian[element] = inv(jacobian) - - # Calculate node coordinates - # Note that the `tree_coordinates` are the midpoints of the cells. - # Hence, we need to add an offset for `nodes` with a midpoint - # different from zero. - for j in eachnode(basis), i in eachnode(basis) - elements.node_coordinates[1, i, j, element] = ( - mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset)) - elements.node_coordinates[2, i, j, element] = ( - mesh.tree.coordinates[2, cell_id] + jacobian * (nodes[j] - reference_offset)) + nodes = get_nodes(basis) + # Compute the length of the 1D reference interval by integrating + # the function with constant value unity on the corresponding + # element data type (using \circ) + reference_length = integrate(one ∘ eltype, nodes, basis) + # Compute the offset of the midpoint of the 1D reference interval + # (its difference from zero) + reference_offset = (first(nodes) + last(nodes)) / 2 + + # Store cell ids + elements.cell_ids .= cell_ids + + # Calculate inverse Jacobian and node coordinates + for element in eachelement(elements) + # Get cell id + cell_id = cell_ids[element] + + # Get cell length + dx = length_at_cell(mesh.tree, cell_id) + + # Calculate inverse Jacobian + jacobian = dx / reference_length + elements.inverse_jacobian[element] = inv(jacobian) + + # Calculate node coordinates + # Note that the `tree_coordinates` are the midpoints of the cells. + # Hence, we need to add an offset for `nodes` with a midpoint + # different from zero. + for j in eachnode(basis), i in eachnode(basis) + elements.node_coordinates[1, i, j, element] = (mesh.tree.coordinates[1, + cell_id] + + jacobian * + (nodes[i] - reference_offset)) + elements.node_coordinates[2, i, j, element] = (mesh.tree.coordinates[2, + cell_id] + + jacobian * + (nodes[j] - reference_offset)) + end end - end - return elements + return elements end - - # Container data structure (structure-of-arrays style) for DG interfaces -mutable struct InterfaceContainer2D{uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 4} # [leftright, variables, i, interfaces] - neighbor_ids::Array{Int, 2} # [leftright, interfaces] - orientations::Vector{Int} # [interfaces] - # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} +mutable struct InterfaceContainer2D{uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 4} # [leftright, variables, i, interfaces] + neighbor_ids::Array{Int, 2} # [leftright, interfaces] + orientations::Vector{Int} # [interfaces] + # internal `resize!`able storage + _u::Vector{uEltype} + _neighbor_ids::Vector{Int} end nvariables(interfaces::InterfaceContainer2D) = size(interfaces.u, 2) @@ -156,169 +157,164 @@ Base.eltype(interfaces::InterfaceContainer2D) = eltype(interfaces.u) # See explanation of Base.resize! for the element container function Base.resize!(interfaces::InterfaceContainer2D, capacity) - n_nodes = nnodes(interfaces) - n_variables = nvariables(interfaces) - @unpack _u, _neighbor_ids, orientations = interfaces + n_nodes = nnodes(interfaces) + n_variables = nvariables(interfaces) + @unpack _u, _neighbor_ids, orientations = interfaces - resize!(_u, 2 * n_variables * n_nodes * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + resize!(_u, 2 * n_variables * n_nodes * capacity) + interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + resize!(_neighbor_ids, 2 * capacity) + interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan, 2 * n_variables * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) - - _neighbor_ids = fill(typemin(Int), 2 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + # Initialize fields with defaults + _u = fill(nan, 2 * n_variables * n_nodes * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - orientations = fill(typemin(Int), capacity) + _neighbor_ids = fill(typemin(Int), 2 * capacity) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) + orientations = fill(typemin(Int), capacity) - return InterfaceContainer2D{uEltype}( - u, neighbor_ids, orientations, - _u, _neighbor_ids) + return InterfaceContainer2D{uEltype}(u, neighbor_ids, orientations, + _u, _neighbor_ids) end - # Return number of interfaces @inline ninterfaces(interfaces::InterfaceContainer2D) = length(interfaces.orientations) - # Create interface container and initialize interface data in `elements`. function init_interfaces(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D) - # Initialize container - n_interfaces = count_required_interfaces(mesh, cell_ids) - interfaces = InterfaceContainer2D{eltype(elements)}( - n_interfaces, nvariables(elements), nnodes(elements)) - - # Connect elements with interfaces - init_interfaces!(interfaces, elements, mesh) - return interfaces + # Initialize container + n_interfaces = count_required_interfaces(mesh, cell_ids) + interfaces = InterfaceContainer2D{eltype(elements)}(n_interfaces, + nvariables(elements), + nnodes(elements)) + + # Connect elements with interfaces + init_interfaces!(interfaces, elements, mesh) + return interfaces end # Count the number of interfaces that need to be created function count_required_interfaces(mesh::TreeMesh2D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # Only count interfaces in positive direction to avoid double counting - if direction % 2 == 1 - continue - end - - # If no neighbor exists, current cell is small or at boundary and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on different rank -> create MPI interface instead - if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # Only count interfaces in positive direction to avoid double counting + if direction % 2 == 1 + continue + end + + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on different rank -> create MPI interface instead + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and interfaces function init_interfaces!(interfaces, elements, mesh::TreeMesh2D) - # Exit early if there are no interfaces to initialize - if ninterfaces(interfaces) == 0 - return nothing - end - - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end + # Exit early if there are no interfaces to initialize + if ninterfaces(interfaces) == 0 + return nothing + end - # Reset interface count - count = 0 + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # Iterate over all elements to find neighbors and to connect via interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - # Loop over directions - for direction in eachdirection(mesh.tree) - # Only create interfaces in positive direction - if direction % 2 == 1 - continue - end - - # If no neighbor exists, current cell is small and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on different rank -> create MPI interface instead - if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) - count += 1 - interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] - interfaces.neighbor_ids[1, count] = element - - # Set orientation (x -> 1, y -> 2) - interfaces.orientations[count] = div(direction, 2) + # Iterate over all elements to find neighbors and to connect via interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # Loop over directions + for direction in eachdirection(mesh.tree) + # Only create interfaces in positive direction + if direction % 2 == 1 + continue + end + + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on different rank -> create MPI interface instead + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) + count += 1 + interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] + interfaces.neighbor_ids[1, count] = element + + # Set orientation (x -> 1, y -> 2) + interfaces.orientations[count] = div(direction, 2) + end end - end - @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " * + @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "* "expectations $(ninterfaces(interfaces))") end - - # Container data structure (structure-of-arrays style) for DG boundaries -mutable struct BoundaryContainer2D{RealT<:Real, uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 4} # [leftright, variables, i, boundaries] - neighbor_ids::Vector{Int} # [boundaries] - orientations::Vector{Int} # [boundaries] - neighbor_sides::Vector{Int} # [boundaries] - node_coordinates::Array{RealT, 3} # [orientation, i, elements] - n_boundaries_per_direction::SVector{4, Int} # [direction] - # internal `resize!`able storage - _u::Vector{uEltype} - _node_coordinates::Vector{RealT} +mutable struct BoundaryContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 4} # [leftright, variables, i, boundaries] + neighbor_ids::Vector{Int} # [boundaries] + orientations::Vector{Int} # [boundaries] + neighbor_sides::Vector{Int} # [boundaries] + node_coordinates::Array{RealT, 3} # [orientation, i, elements] + n_boundaries_per_direction::SVector{4, Int} # [direction] + # internal `resize!`able storage + _u::Vector{uEltype} + _node_coordinates::Vector{RealT} end nvariables(boundaries::BoundaryContainer2D) = size(boundaries.u, 2) @@ -327,180 +323,179 @@ Base.eltype(boundaries::BoundaryContainer2D) = eltype(boundaries.u) # See explanation of Base.resize! for the element container function Base.resize!(boundaries::BoundaryContainer2D, capacity) - n_nodes = nnodes(boundaries) - n_variables = nvariables(boundaries) - @unpack _u, _node_coordinates, - neighbor_ids, orientations, neighbor_sides = boundaries + n_nodes = nnodes(boundaries) + n_variables = nvariables(boundaries) + @unpack _u, _node_coordinates, + neighbor_ids, orientations, neighbor_sides = boundaries - resize!(_u, 2 * n_variables * n_nodes * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + resize!(_u, 2 * n_variables * n_nodes * capacity) + boundaries.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - resize!(_node_coordinates, 2 * n_nodes * capacity) - boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (2, n_nodes, capacity)) + resize!(_node_coordinates, 2 * n_nodes * capacity) + boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (2, n_nodes, capacity)) - resize!(neighbor_ids, capacity) + resize!(neighbor_ids, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - resize!(neighbor_sides, capacity) + resize!(neighbor_sides, capacity) - return nothing + return nothing end +function BoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function BoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan_uEltype, 2 * n_variables * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + # Initialize fields with defaults + _u = fill(nan_uEltype, 2 * n_variables * n_nodes * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - neighbor_ids = fill(typemin(Int), capacity) + neighbor_ids = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - neighbor_sides = fill(typemin(Int), capacity) + neighbor_sides = fill(typemin(Int), capacity) - _node_coordinates = fill(nan_RealT, 2 * n_nodes * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (2, n_nodes, capacity)) + _node_coordinates = fill(nan_RealT, 2 * n_nodes * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (2, n_nodes, capacity)) - n_boundaries_per_direction = SVector(0, 0, 0, 0) + n_boundaries_per_direction = SVector(0, 0, 0, 0) - return BoundaryContainer2D{RealT, uEltype}( - u, neighbor_ids, orientations, neighbor_sides, - node_coordinates, n_boundaries_per_direction, - _u, _node_coordinates) + return BoundaryContainer2D{RealT, uEltype}(u, neighbor_ids, orientations, + neighbor_sides, + node_coordinates, + n_boundaries_per_direction, + _u, _node_coordinates) end - # Return number of boundaries @inline nboundaries(boundaries::BoundaryContainer2D) = length(boundaries.orientations) - # Create boundaries container and initialize boundary data in `elements`. function init_boundaries(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D) - # Initialize container - n_boundaries = count_required_boundaries(mesh, cell_ids) - boundaries = BoundaryContainer2D{real(elements), eltype(elements)}( - n_boundaries, nvariables(elements), nnodes(elements)) - - # Connect elements with boundaries - init_boundaries!(boundaries, elements, mesh) - return boundaries + # Initialize container + n_boundaries = count_required_boundaries(mesh, cell_ids) + boundaries = BoundaryContainer2D{real(elements), eltype(elements)}(n_boundaries, + nvariables(elements), + nnodes(elements)) + + # Connect elements with boundaries + init_boundaries!(boundaries, elements, mesh) + return boundaries end # Count the number of boundaries that need to be created function count_required_boundaries(mesh::TreeMesh2D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # No neighbor exists in this direction -> must be a boundary - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # No neighbor exists in this direction -> must be a boundary + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and boundaries function init_boundaries!(boundaries, elements, mesh::TreeMesh2D) - # Exit early if there are no boundaries to initialize - if nboundaries(boundaries) == 0 - return nothing - end + # Exit early if there are no boundaries to initialize + if nboundaries(boundaries) == 0 + return nothing + end - # Reset boundaries count - count = 0 + # Reset boundaries count + count = 0 - # Initialize boundary counts - counts_per_direction = MVector(0, 0, 0, 0) + # Initialize boundary counts + counts_per_direction = MVector(0, 0, 0, 0) - # OBS! Iterate over directions first, then over elements, and count boundaries in each direction - # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., - # obviating the need to store the boundary condition to be applied explicitly. - # Loop over directions - for direction in eachdirection(mesh.tree) - # Iterate over all elements to find missing neighbors and to connect to boundaries - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] - - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Create boundary - count += 1 - counts_per_direction[direction] += 1 - - # Set neighbor element id - boundaries.neighbor_ids[count] = element - - # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element - if iseven(direction) - boundaries.neighbor_sides[count] = 1 - else - boundaries.neighbor_sides[count] = 2 - end - - # Set orientation (x -> 1, y -> 2) - if direction in (1, 2) - boundaries.orientations[count] = 1 - else - boundaries.orientations[count] = 2 - end - - # Store node coordinates - enc = elements.node_coordinates - if direction == 1 # -x direction - boundaries.node_coordinates[:, :, count] .= enc[:, 1, :, element] - elseif direction == 2 # +x direction - boundaries.node_coordinates[:, :, count] .= enc[:, end, :, element] - elseif direction == 3 # -y direction - boundaries.node_coordinates[:, :, count] .= enc[:, :, 1, element] - elseif direction == 4 # +y direction - boundaries.node_coordinates[:, :, count] .= enc[:, :, end, element] - else - error("should not happen") - end + # OBS! Iterate over directions first, then over elements, and count boundaries in each direction + # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., + # obviating the need to store the boundary condition to be applied explicitly. + # Loop over directions + for direction in eachdirection(mesh.tree) + # Iterate over all elements to find missing neighbors and to connect to boundaries + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Create boundary + count += 1 + counts_per_direction[direction] += 1 + + # Set neighbor element id + boundaries.neighbor_ids[count] = element + + # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element + if iseven(direction) + boundaries.neighbor_sides[count] = 1 + else + boundaries.neighbor_sides[count] = 2 + end + + # Set orientation (x -> 1, y -> 2) + if direction in (1, 2) + boundaries.orientations[count] = 1 + else + boundaries.orientations[count] = 2 + end + + # Store node coordinates + enc = elements.node_coordinates + if direction == 1 # -x direction + boundaries.node_coordinates[:, :, count] .= enc[:, 1, :, element] + elseif direction == 2 # +x direction + boundaries.node_coordinates[:, :, count] .= enc[:, end, :, element] + elseif direction == 3 # -y direction + boundaries.node_coordinates[:, :, count] .= enc[:, :, 1, element] + elseif direction == 4 # +y direction + boundaries.node_coordinates[:, :, count] .= enc[:, :, end, element] + else + error("should not happen") + end + end end - end - @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " * + @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "* "expectations $(nboundaries(boundaries))") - @assert sum(counts_per_direction) == count + @assert sum(counts_per_direction) == count - boundaries.n_boundaries_per_direction = SVector(counts_per_direction) + boundaries.n_boundaries_per_direction = SVector(counts_per_direction) - return boundaries.n_boundaries_per_direction + return boundaries.n_boundaries_per_direction end - - # Container data structure (structure-of-arrays style) for DG L2 mortars # Positions/directions for orientations = 1, large_sides = 2: # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar @@ -511,17 +506,17 @@ end # | | # lower = 1 | | # | | -mutable struct L2MortarContainer2D{uEltype<:Real} <: AbstractContainer - u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars] - u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars] - neighbor_ids::Array{Int, 2} # [position, mortars] - # Large sides: left -> 1, right -> 2 - large_sides::Vector{Int} # [mortars] - orientations::Vector{Int} # [mortars] - # internal `resize!`able storage - _u_upper::Vector{uEltype} - _u_lower::Vector{uEltype} - _neighbor_ids::Vector{Int} +mutable struct L2MortarContainer2D{uEltype <: Real} <: AbstractContainer + u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars] + u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars] + neighbor_ids::Array{Int, 2} # [position, mortars] + # Large sides: left -> 1, right -> 2 + large_sides::Vector{Int} # [mortars] + orientations::Vector{Int} # [mortars] + # internal `resize!`able storage + _u_upper::Vector{uEltype} + _u_lower::Vector{uEltype} + _neighbor_ids::Vector{Int} end nvariables(mortars::L2MortarContainer2D) = size(mortars.u_upper, 2) @@ -530,249 +525,251 @@ Base.eltype(mortars::L2MortarContainer2D) = eltype(mortars.u_upper) # See explanation of Base.resize! for the element container function Base.resize!(mortars::L2MortarContainer2D, capacity) - n_nodes = nnodes(mortars) - n_variables = nvariables(mortars) - @unpack _u_upper, _u_lower, _neighbor_ids, - large_sides, orientations = mortars + n_nodes = nnodes(mortars) + n_variables = nvariables(mortars) + @unpack _u_upper, _u_lower, _neighbor_ids, + large_sides, orientations = mortars - resize!(_u_upper, 2 * n_variables * n_nodes * capacity) - mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper), - (2, n_variables, n_nodes, capacity)) + resize!(_u_upper, 2 * n_variables * n_nodes * capacity) + mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper), + (2, n_variables, n_nodes, capacity)) - resize!(_u_lower, 2 * n_variables * n_nodes * capacity) - mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower), - (2, n_variables, n_nodes, capacity)) + resize!(_u_lower, 2 * n_variables * n_nodes * capacity) + mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower), + (2, n_variables, n_nodes, capacity)) - resize!(_neighbor_ids, 3 * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (3, capacity)) + resize!(_neighbor_ids, 3 * capacity) + mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (3, capacity)) - resize!(large_sides, capacity) + resize!(large_sides, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function L2MortarContainer2D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function L2MortarContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity) - u_upper = unsafe_wrap(Array, pointer(_u_upper), - (2, n_variables, n_nodes, capacity)) + # Initialize fields with defaults + _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity) + u_upper = unsafe_wrap(Array, pointer(_u_upper), + (2, n_variables, n_nodes, capacity)) - _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity) - u_lower = unsafe_wrap(Array, pointer(_u_lower), - (2, n_variables, n_nodes, capacity)) + _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity) + u_lower = unsafe_wrap(Array, pointer(_u_lower), + (2, n_variables, n_nodes, capacity)) - _neighbor_ids = fill(typemin(Int), 3 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (3, capacity)) + _neighbor_ids = fill(typemin(Int), 3 * capacity) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (3, capacity)) - large_sides = fill(typemin(Int), capacity) + large_sides = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - return L2MortarContainer2D{uEltype}( - u_upper, u_lower, neighbor_ids, large_sides, orientations, - _u_upper, _u_lower, _neighbor_ids) + return L2MortarContainer2D{uEltype}(u_upper, u_lower, neighbor_ids, large_sides, + orientations, + _u_upper, _u_lower, _neighbor_ids) end - # Return number of L2 mortars @inline nmortars(l2mortars::L2MortarContainer2D) = length(l2mortars.orientations) - # Allow printing container contents function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer2D) - @nospecialize c # reduce precompilation time - - println(io, '*'^20) - for idx in CartesianIndices(c.u_upper) - println(io, "c.u_upper[$idx] = $(c.u_upper[idx])") - end - for idx in CartesianIndices(c.u_lower) - println(io, "c.u_lower[$idx] = $(c.u_lower[idx])") - end - println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))") - println(io, "c.large_sides = $(c.large_sides)") - println(io, "c.orientations = $(c.orientations)") - print(io, '*'^20) -end + @nospecialize c # reduce precompilation time + println(io, '*'^20) + for idx in CartesianIndices(c.u_upper) + println(io, "c.u_upper[$idx] = $(c.u_upper[idx])") + end + for idx in CartesianIndices(c.u_lower) + println(io, "c.u_lower[$idx] = $(c.u_lower[idx])") + end + println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))") + println(io, "c.large_sides = $(c.large_sides)") + println(io, "c.orientations = $(c.orientations)") + print(io, '*'^20) +end # Create mortar container and initialize mortar data in `elements`. function init_mortars(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D, ::LobattoLegendreMortarL2) - # Initialize containers - n_mortars = count_required_mortars(mesh, cell_ids) - mortars = L2MortarContainer2D{eltype(elements)}( - n_mortars, nvariables(elements), nnodes(elements)) - - # Connect elements with mortars - init_mortars!(mortars, elements, mesh) - return mortars + # Initialize containers + n_mortars = count_required_mortars(mesh, cell_ids) + mortars = L2MortarContainer2D{eltype(elements)}(n_mortars, nvariables(elements), + nnodes(elements)) + + # Connect elements with mortars + init_mortars!(mortars, elements, mesh) + return mortars end # Count the number of mortars that need to be created function count_required_mortars(mesh::TreeMesh2D, cell_ids) - count = 0 - - # Iterate over all cells and count mortars from perspective of coarse cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_id) - continue - end - - # Skip if one of the small cells is on different rank -> create mpi mortar instead - # (the coarse cell is always on the local rank) - if mpi_isparallel() - if direction == 1 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, neighbor_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_id] - elseif direction == 2 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_id] - upper_cell_id = mesh.tree.child_ids[3, neighbor_id] - elseif direction == 3 # small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, neighbor_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_id] - else # direction == 4, small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_id] - upper_cell_id = mesh.tree.child_ids[2, neighbor_id] + count = 0 + + # Iterate over all cells and count mortars from perspective of coarse cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_id) + continue + end + + # Skip if one of the small cells is on different rank -> create mpi mortar instead + # (the coarse cell is always on the local rank) + if mpi_isparallel() + if direction == 1 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, neighbor_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_id] + elseif direction == 2 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_id] + upper_cell_id = mesh.tree.child_ids[3, neighbor_id] + elseif direction == 3 # small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, neighbor_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_id] + else # direction == 4, small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_id] + upper_cell_id = mesh.tree.child_ids[2, neighbor_id] + end + small_cell_ids = (lower_cell_id, upper_cell_id) + if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids) + continue + end + end + + count += 1 end - small_cell_ids = (lower_cell_id, upper_cell_id) - if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids) - continue - end - end - - count +=1 end - end - return count + return count end # Initialize connectivity between elements and mortars function init_mortars!(mortars, elements, mesh::TreeMesh2D) - # Exit early if there are no mortars to initialize - if nmortars(mortars) == 0 - return nothing - end - - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end + # Exit early if there are no mortars to initialize + if nmortars(mortars) == 0 + return nothing + end - # Reset interface count - count = 0 + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # Iterate over all elements to find neighbors and to connect via interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor -> do nothing - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if one of the small cells is on different rank -> create mpi mortar instead - # (the coarse cell is always on the local rank) - if mpi_isparallel() - if direction == 1 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] - elseif direction == 2 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] - elseif direction == 3 # small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] - else # direction == 4, small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] - end - small_cell_ids = (lower_cell_id, upper_cell_id) - if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids) - continue + # Iterate over all elements to find neighbors and to connect via interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor -> do nothing + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if one of the small cells is on different rank -> create mpi mortar instead + # (the coarse cell is always on the local rank) + if mpi_isparallel() + if direction == 1 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] + elseif direction == 2 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] + elseif direction == 3 # small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] + else # direction == 4, small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] + end + small_cell_ids = (lower_cell_id, upper_cell_id) + if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids) + continue + end + end + + # Create mortar between elements: + # 1 -> small element in negative coordinate direction + # 2 -> small element in positive coordinate direction + # 3 -> large element + count += 1 + mortars.neighbor_ids[3, count] = element + if direction == 1 + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, + neighbor_cell_id]] + elseif direction == 2 + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, + neighbor_cell_id]] + elseif direction == 3 + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, + neighbor_cell_id]] + elseif direction == 4 + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + else + error("should not happen") + end + + # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side + if iseven(direction) + mortars.large_sides[count] = 1 + else + mortars.large_sides[count] = 2 + end + + # Set orientation (x -> 1, y -> 2) + if direction in (1, 2) + mortars.orientations[count] = 1 + else + mortars.orientations[count] = 2 + end end - end - - - # Create mortar between elements: - # 1 -> small element in negative coordinate direction - # 2 -> small element in positive coordinate direction - # 3 -> large element - count += 1 - mortars.neighbor_ids[3, count] = element - if direction == 1 - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]] - elseif direction == 2 - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]] - elseif direction == 3 - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]] - elseif direction == 4 - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - else - error("should not happen") - end - - # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side - if iseven(direction) - mortars.large_sides[count] = 1 - else - mortars.large_sides[count] = 2 - end - - # Set orientation (x -> 1, y -> 2) - if direction in (1, 2) - mortars.orientations[count] = 1 - else - mortars.orientations[count] = 2 - end end - end - @assert count == nmortars(mortars) ("Actual mortar count ($count) does not match " * + @assert count==nmortars(mortars) ("Actual mortar count ($count) does not match "* "expectations $(nmortars(mortars))") end - - # Container data structure (structure-of-arrays style) for DG MPI interfaces -mutable struct MPIInterfaceContainer2D{uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 4} # [leftright, variables, i, interfaces] - local_neighbor_ids::Vector{Int} # [interfaces] - orientations::Vector{Int} # [interfaces] - remote_sides::Vector{Int} # [interfaces] - # internal `resize!`able storage - _u::Vector{uEltype} +mutable struct MPIInterfaceContainer2D{uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 4} # [leftright, variables, i, interfaces] + local_neighbor_ids::Vector{Int} # [interfaces] + orientations::Vector{Int} # [interfaces] + remote_sides::Vector{Int} # [interfaces] + # internal `resize!`able storage + _u::Vector{uEltype} end nvariables(mpi_interfaces::MPIInterfaceContainer2D) = size(mpi_interfaces.u, 2) @@ -781,154 +778,154 @@ Base.eltype(mpi_interfaces::MPIInterfaceContainer2D) = eltype(mpi_interfaces.u) # See explanation of Base.resize! for the element container function Base.resize!(mpi_interfaces::MPIInterfaceContainer2D, capacity) - n_nodes = nnodes(mpi_interfaces) - n_variables = nvariables(mpi_interfaces) - @unpack _u, local_neighbor_ids, orientations, remote_sides = mpi_interfaces + n_nodes = nnodes(mpi_interfaces) + n_variables = nvariables(mpi_interfaces) + @unpack _u, local_neighbor_ids, orientations, remote_sides = mpi_interfaces - resize!(_u, 2 * n_variables * n_nodes * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + resize!(_u, 2 * n_variables * n_nodes * capacity) + mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - resize!(local_neighbor_ids, capacity) + resize!(local_neighbor_ids, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - resize!(remote_sides, capacity) + resize!(remote_sides, capacity) - return nothing + return nothing end +function MPIInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function MPIInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan, 2 * n_variables * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, capacity)) + # Initialize fields with defaults + _u = fill(nan, 2 * n_variables * n_nodes * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, capacity)) - local_neighbor_ids = fill(typemin(Int), capacity) + local_neighbor_ids = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - remote_sides = fill(typemin(Int), capacity) + remote_sides = fill(typemin(Int), capacity) - return MPIInterfaceContainer2D{uEltype}( - u, local_neighbor_ids, orientations, remote_sides, - _u) + return MPIInterfaceContainer2D{uEltype}(u, local_neighbor_ids, orientations, + remote_sides, + _u) end - # TODO: Taal, rename to ninterfaces? # Return number of interfaces -nmpiinterfaces(mpi_interfaces::MPIInterfaceContainer2D) = length(mpi_interfaces.orientations) - +function nmpiinterfaces(mpi_interfaces::MPIInterfaceContainer2D) + length(mpi_interfaces.orientations) +end # Create MPI interface container and initialize MPI interface data in `elements`. function init_mpi_interfaces(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D) - # Initialize container - n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) - mpi_interfaces = MPIInterfaceContainer2D{eltype(elements)}( - n_mpi_interfaces, nvariables(elements), nnodes(elements)) - - # Connect elements with interfaces - init_mpi_interfaces!(mpi_interfaces, elements, mesh) - return mpi_interfaces + # Initialize container + n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) + mpi_interfaces = MPIInterfaceContainer2D{eltype(elements)}(n_mpi_interfaces, + nvariables(elements), + nnodes(elements)) + + # Connect elements with interfaces + init_mpi_interfaces!(mpi_interfaces, elements, mesh) + return mpi_interfaces end # Count the number of MPI interfaces that need to be created function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids) - # No MPI interfaces needed if MPI is not used - if !mpi_isparallel() - return 0 - end + # No MPI interfaces needed if MPI is not used + if !mpi_isparallel() + return 0 + end - count = 0 + count = 0 - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If no neighbor exists, current cell is small or at boundary and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on this rank -> create regular interface instead - if is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - count += 1 + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this rank -> create regular interface instead + if is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and interfaces function init_mpi_interfaces!(mpi_interfaces, elements, mesh::TreeMesh2D) - # Exit early if there are no MPI interfaces to initialize - if nmpiinterfaces(mpi_interfaces) == 0 - return nothing - end - - # Reset interface count - count = 0 + # Exit early if there are no MPI interfaces to initialize + if nmpiinterfaces(mpi_interfaces) == 0 + return nothing + end - # Iterate over all elements to find neighbors and to connect via mpi_interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - # Loop over directions - for direction in eachdirection(mesh.tree) - # If no neighbor exists, current cell is small and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on this MPI rank -> create regular interface instead - if is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - # Create interface between elements - count += 1 - mpi_interfaces.local_neighbor_ids[count] = element - - if iseven(direction) # element is "left" of interface, remote cell is "right" of interface - mpi_interfaces.remote_sides[count] = 2 - else - mpi_interfaces.remote_sides[count] = 1 - end - - # Set orientation (x -> 1, y -> 2) - if direction in (1, 2) # x-direction - mpi_interfaces.orientations[count] = 1 - else # y-direction - mpi_interfaces.orientations[count] = 2 - end + # Iterate over all elements to find neighbors and to connect via mpi_interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # Loop over directions + for direction in eachdirection(mesh.tree) + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this MPI rank -> create regular interface instead + if is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements + count += 1 + mpi_interfaces.local_neighbor_ids[count] = element + + if iseven(direction) # element is "left" of interface, remote cell is "right" of interface + mpi_interfaces.remote_sides[count] = 2 + else + mpi_interfaces.remote_sides[count] = 1 + end + + # Set orientation (x -> 1, y -> 2) + if direction in (1, 2) # x-direction + mpi_interfaces.orientations[count] = 1 + else # y-direction + mpi_interfaces.orientations[count] = 2 + end + end end - end - @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " - * "expectations $(nmpiinterfaces(mpi_interfaces))") + @assert count==nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " + *"expectations $(nmpiinterfaces(mpi_interfaces))") end - # Container data structure (structure-of-arrays style) for DG L2 mortars # Positions/directions for orientations = 1, large_sides = 2: # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar @@ -939,17 +936,17 @@ end # | | # lower = 1 | | # | | -mutable struct MPIL2MortarContainer2D{uEltype<:Real} <: AbstractContainer - u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars] - u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars] - local_neighbor_ids::Vector{Vector{Int}} # [mortars] - local_neighbor_positions::Vector{Vector{Int}} # [mortars] - # Large sides: left -> 1, right -> 2 - large_sides::Vector{Int} # [mortars] - orientations::Vector{Int} # [mortars] - # internal `resize!`able storage - _u_upper::Vector{uEltype} - _u_lower::Vector{uEltype} +mutable struct MPIL2MortarContainer2D{uEltype <: Real} <: AbstractContainer + u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars] + u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars] + local_neighbor_ids::Vector{Vector{Int}} # [mortars] + local_neighbor_positions::Vector{Vector{Int}} # [mortars] + # Large sides: left -> 1, right -> 2 + large_sides::Vector{Int} # [mortars] + orientations::Vector{Int} # [mortars] + # internal `resize!`able storage + _u_upper::Vector{uEltype} + _u_lower::Vector{uEltype} end nvariables(mpi_mortars::MPIL2MortarContainer2D) = size(mpi_mortars.u_upper, 2) @@ -958,303 +955,303 @@ Base.eltype(mpi_mortars::MPIL2MortarContainer2D) = eltype(mpi_mortars.u_upper) # See explanation of Base.resize! for the element container function Base.resize!(mpi_mortars::MPIL2MortarContainer2D, capacity) - n_nodes = nnodes(mpi_mortars) - n_variables = nvariables(mpi_mortars) - @unpack _u_upper, _u_lower, local_neighbor_ids, local_neighbor_positions, - large_sides, orientations = mpi_mortars + n_nodes = nnodes(mpi_mortars) + n_variables = nvariables(mpi_mortars) + @unpack _u_upper, _u_lower, local_neighbor_ids, local_neighbor_positions, + large_sides, orientations = mpi_mortars - resize!(_u_upper, 2 * n_variables * n_nodes * capacity) - mpi_mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper), - (2, n_variables, n_nodes, capacity)) + resize!(_u_upper, 2 * n_variables * n_nodes * capacity) + mpi_mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper), + (2, n_variables, n_nodes, capacity)) - resize!(_u_lower, 2 * n_variables * n_nodes * capacity) - mpi_mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower), - (2, n_variables, n_nodes, capacity)) + resize!(_u_lower, 2 * n_variables * n_nodes * capacity) + mpi_mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower), + (2, n_variables, n_nodes, capacity)) - resize!(local_neighbor_ids, capacity) - resize!(local_neighbor_positions, capacity) + resize!(local_neighbor_ids, capacity) + resize!(local_neighbor_positions, capacity) - resize!(large_sides, capacity) + resize!(large_sides, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function MPIL2MortarContainer2D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function MPIL2MortarContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity) - u_upper = unsafe_wrap(Array, pointer(_u_upper), - (2, n_variables, n_nodes, capacity)) + # Initialize fields with defaults + _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity) + u_upper = unsafe_wrap(Array, pointer(_u_upper), + (2, n_variables, n_nodes, capacity)) - _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity) - u_lower = unsafe_wrap(Array, pointer(_u_lower), - (2, n_variables, n_nodes, capacity)) + _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity) + u_lower = unsafe_wrap(Array, pointer(_u_lower), + (2, n_variables, n_nodes, capacity)) - local_neighbor_ids = fill(Vector{Int}(), capacity) - local_neighbor_positions = fill(Vector{Int}(), capacity) + local_neighbor_ids = fill(Vector{Int}(), capacity) + local_neighbor_positions = fill(Vector{Int}(), capacity) - large_sides = fill(typemin(Int), capacity) + large_sides = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - return MPIL2MortarContainer2D{uEltype}( - u_upper, u_lower, local_neighbor_ids, local_neighbor_positions, large_sides, orientations, - _u_upper, _u_lower) + return MPIL2MortarContainer2D{uEltype}(u_upper, u_lower, local_neighbor_ids, + local_neighbor_positions, large_sides, + orientations, + _u_upper, _u_lower) end - # Return number of L2 mortars -@inline nmpimortars(mpi_l2mortars::MPIL2MortarContainer2D) = length(mpi_l2mortars.orientations) - +@inline function nmpimortars(mpi_l2mortars::MPIL2MortarContainer2D) + length(mpi_l2mortars.orientations) +end # Create MPI mortar container and initialize MPI mortar data in `elements`. function init_mpi_mortars(cell_ids, mesh::TreeMesh2D, elements::ElementContainer2D, ::LobattoLegendreMortarL2) - # Initialize containers - n_mpi_mortars = count_required_mpi_mortars(mesh, cell_ids) - mpi_mortars = MPIL2MortarContainer2D{eltype(elements)}( - n_mpi_mortars, nvariables(elements), nnodes(elements)) - - # Connect elements with mortars - init_mpi_mortars!(mpi_mortars, elements, mesh) - return mpi_mortars + # Initialize containers + n_mpi_mortars = count_required_mpi_mortars(mesh, cell_ids) + mpi_mortars = MPIL2MortarContainer2D{eltype(elements)}(n_mpi_mortars, + nvariables(elements), + nnodes(elements)) + + # Connect elements with mortars + init_mpi_mortars!(mpi_mortars, elements, mesh) + return mpi_mortars end # Count the number of MPI mortars that need to be created function count_required_mpi_mortars(mesh::TreeMesh2D, cell_ids) - # No MPI mortars needed if MPI is not used - if !mpi_isparallel() - return 0 - end - - count = 0 - - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor or at boundary - if !has_neighbor(mesh.tree, cell_id, direction) - # If no large neighbor exists, cell is at boundary -> do nothing - if !has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if the large neighbor is on the same rank to prevent double counting - parent_id = mesh.tree.parent_ids[cell_id] - large_cell_id = mesh.tree.neighbor_ids[direction, parent_id] - if is_own_cell(mesh.tree, large_cell_id) - continue - end - - # Current cell is small with large neighbor on a different rank, find the other - # small cell - if direction == 1 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, parent_id] - upper_cell_id = mesh.tree.child_ids[3, parent_id] - elseif direction == 2 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, parent_id] - upper_cell_id = mesh.tree.child_ids[4, parent_id] - elseif direction == 3 # small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, parent_id] - upper_cell_id = mesh.tree.child_ids[2, parent_id] - else # direction == 4, small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, parent_id] - upper_cell_id = mesh.tree.child_ids[4, parent_id] - end - - if cell_id == lower_cell_id - sibling_id = upper_cell_id - elseif cell_id == upper_cell_id - sibling_id = lower_cell_id - else - error("should not happen") - end - - # Skip if the other small cell is on the same rank and its id is smaller than the current - # cell id to prevent double counting - if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id - continue - end - else # Cell has a neighbor - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_id) - continue - end + # No MPI mortars needed if MPI is not used + if !mpi_isparallel() + return 0 + end - # Skip if both small cells are on this rank -> create regular mortar instead - if direction == 1 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, neighbor_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_id] - elseif direction == 2 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_id] - upper_cell_id = mesh.tree.child_ids[3, neighbor_id] - elseif direction == 3 # small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, neighbor_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_id] - else # direction == 4, small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_id] - upper_cell_id = mesh.tree.child_ids[2, neighbor_id] - end - small_cell_ids = (lower_cell_id, upper_cell_id) - if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids) - continue + count = 0 + + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor or at boundary + if !has_neighbor(mesh.tree, cell_id, direction) + # If no large neighbor exists, cell is at boundary -> do nothing + if !has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if the large neighbor is on the same rank to prevent double counting + parent_id = mesh.tree.parent_ids[cell_id] + large_cell_id = mesh.tree.neighbor_ids[direction, parent_id] + if is_own_cell(mesh.tree, large_cell_id) + continue + end + + # Current cell is small with large neighbor on a different rank, find the other + # small cell + if direction == 1 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, parent_id] + upper_cell_id = mesh.tree.child_ids[3, parent_id] + elseif direction == 2 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, parent_id] + upper_cell_id = mesh.tree.child_ids[4, parent_id] + elseif direction == 3 # small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, parent_id] + upper_cell_id = mesh.tree.child_ids[2, parent_id] + else # direction == 4, small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, parent_id] + upper_cell_id = mesh.tree.child_ids[4, parent_id] + end + + if cell_id == lower_cell_id + sibling_id = upper_cell_id + elseif cell_id == upper_cell_id + sibling_id = lower_cell_id + else + error("should not happen") + end + + # Skip if the other small cell is on the same rank and its id is smaller than the current + # cell id to prevent double counting + if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id + continue + end + else # Cell has a neighbor + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_id) + continue + end + + # Skip if both small cells are on this rank -> create regular mortar instead + if direction == 1 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, neighbor_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_id] + elseif direction == 2 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_id] + upper_cell_id = mesh.tree.child_ids[3, neighbor_id] + elseif direction == 3 # small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, neighbor_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_id] + else # direction == 4, small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_id] + upper_cell_id = mesh.tree.child_ids[2, neighbor_id] + end + small_cell_ids = (lower_cell_id, upper_cell_id) + if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids) + continue + end + end + + count += 1 end - end - - count += 1 end - end - return count + return count end # Initialize connectivity between elements and mortars function init_mpi_mortars!(mpi_mortars, elements, mesh::TreeMesh2D) - # Exit early if there are no MPI mortars to initialize - if nmpimortars(mpi_mortars) == 0 - return nothing - end - - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end - - # Reset mortar count - count = 0 - - # Iterate over all elements to find neighbors and to connect via mortars - for element in eachelement(elements) - cell_id = elements.cell_ids[element] - - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor or at boundary - if !has_neighbor(mesh.tree, cell_id, direction) - # If no large neighbor exists, cell is at boundary -> do nothing - if !has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if the large neighbor is on the same rank -> will be handled in another iteration - parent_cell_id = mesh.tree.parent_ids[cell_id] - large_cell_id = mesh.tree.neighbor_ids[direction, parent_cell_id] - if is_own_cell(mesh.tree, large_cell_id) - continue - end - - # Current cell is small with large neighbor on a different rank, find the other - # small cell - if direction == 1 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, parent_cell_id] - upper_cell_id = mesh.tree.child_ids[3, parent_cell_id] - elseif direction == 2 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, parent_cell_id] - upper_cell_id = mesh.tree.child_ids[4, parent_cell_id] - elseif direction == 3 # small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, parent_cell_id] - upper_cell_id = mesh.tree.child_ids[2, parent_cell_id] - else # direction == 4, small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, parent_cell_id] - upper_cell_id = mesh.tree.child_ids[4, parent_cell_id] - end - - if cell_id == lower_cell_id - sibling_id = upper_cell_id - elseif cell_id == upper_cell_id - sibling_id = lower_cell_id - else - error("should not happen") - end + # Exit early if there are no MPI mortars to initialize + if nmpimortars(mpi_mortars) == 0 + return nothing + end - # Skip if the other small cell is on the same rank and its id is smaller than the current - # cell id to prevent double counting - if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id - continue - end - else # Cell has a neighbor - large_cell_id = cell_id # save explicitly for later processing + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_cell_id) - continue - end + # Reset mortar count + count = 0 - # Skip if both small cells are on this rank -> create regular mortar instead - if direction == 1 # small cells left, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] - elseif direction == 2 # small cells right, mortar in x-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] - elseif direction == 3 # small cells left, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] - else # direction == 4, small cells right, mortar in y-direction - lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] - upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] - end - small_cell_ids = (lower_cell_id, upper_cell_id) - if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids) - continue + # Iterate over all elements to find neighbors and to connect via mortars + for element in eachelement(elements) + cell_id = elements.cell_ids[element] + + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor or at boundary + if !has_neighbor(mesh.tree, cell_id, direction) + # If no large neighbor exists, cell is at boundary -> do nothing + if !has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if the large neighbor is on the same rank -> will be handled in another iteration + parent_cell_id = mesh.tree.parent_ids[cell_id] + large_cell_id = mesh.tree.neighbor_ids[direction, parent_cell_id] + if is_own_cell(mesh.tree, large_cell_id) + continue + end + + # Current cell is small with large neighbor on a different rank, find the other + # small cell + if direction == 1 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, parent_cell_id] + upper_cell_id = mesh.tree.child_ids[3, parent_cell_id] + elseif direction == 2 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, parent_cell_id] + upper_cell_id = mesh.tree.child_ids[4, parent_cell_id] + elseif direction == 3 # small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, parent_cell_id] + upper_cell_id = mesh.tree.child_ids[2, parent_cell_id] + else # direction == 4, small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, parent_cell_id] + upper_cell_id = mesh.tree.child_ids[4, parent_cell_id] + end + + if cell_id == lower_cell_id + sibling_id = upper_cell_id + elseif cell_id == upper_cell_id + sibling_id = lower_cell_id + else + error("should not happen") + end + + # Skip if the other small cell is on the same rank and its id is smaller than the current + # cell id to prevent double counting + if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id + continue + end + else # Cell has a neighbor + large_cell_id = cell_id # save explicitly for later processing + + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if both small cells are on this rank -> create regular mortar instead + if direction == 1 # small cells left, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] + elseif direction == 2 # small cells right, mortar in x-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] + elseif direction == 3 # small cells left, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id] + else # direction == 4, small cells right, mortar in y-direction + lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id] + upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id] + end + small_cell_ids = (lower_cell_id, upper_cell_id) + if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids) + continue + end + end + + # Create mortar between elements: + # 1 -> small element in negative coordinate direction + # 2 -> small element in positive coordinate direction + # 3 -> large element + count += 1 + + local_neighbor_ids = Vector{Int}() + local_neighbor_positions = Vector{Int}() + if is_own_cell(mesh.tree, lower_cell_id) + push!(local_neighbor_ids, c2e[lower_cell_id]) + push!(local_neighbor_positions, 1) + end + if is_own_cell(mesh.tree, upper_cell_id) + push!(local_neighbor_ids, c2e[upper_cell_id]) + push!(local_neighbor_positions, 2) + end + if is_own_cell(mesh.tree, large_cell_id) + push!(local_neighbor_ids, c2e[large_cell_id]) + push!(local_neighbor_positions, 3) + end + + mpi_mortars.local_neighbor_ids[count] = local_neighbor_ids + mpi_mortars.local_neighbor_positions[count] = local_neighbor_positions + + # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side + # To prevent double counting, the mortars are always identified from the point of view of + # a large cell, if it is on this rank. In that case, direction points towards the small cells. + # If the large cell is not on this rank, the point of view of a small cell is taken instead, + # hence direction points towards the large cell in this case. + if iseven(direction) + mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? + 1 : 2 + else + mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? + 2 : 1 + end + + # Set orientation (1, 2 -> x; 3, 4 -> y) + if direction in (1, 2) + mpi_mortars.orientations[count] = 1 + else + mpi_mortars.orientations[count] = 2 + end end - end - - # Create mortar between elements: - # 1 -> small element in negative coordinate direction - # 2 -> small element in positive coordinate direction - # 3 -> large element - count += 1 - - local_neighbor_ids = Vector{Int}() - local_neighbor_positions = Vector{Int}() - if is_own_cell(mesh.tree, lower_cell_id) - push!(local_neighbor_ids, c2e[lower_cell_id]) - push!(local_neighbor_positions, 1) - end - if is_own_cell(mesh.tree, upper_cell_id) - push!(local_neighbor_ids, c2e[upper_cell_id]) - push!(local_neighbor_positions, 2) - end - if is_own_cell(mesh.tree, large_cell_id) - push!(local_neighbor_ids, c2e[large_cell_id]) - push!(local_neighbor_positions, 3) - end - - mpi_mortars.local_neighbor_ids[count] = local_neighbor_ids - mpi_mortars.local_neighbor_positions[count] = local_neighbor_positions - - # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side - # To prevent double counting, the mortars are always identified from the point of view of - # a large cell, if it is on this rank. In that case, direction points towards the small cells. - # If the large cell is not on this rank, the point of view of a small cell is taken instead, - # hence direction points towards the large cell in this case. - if iseven(direction) - mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? 1 : 2 - else - mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? 2 : 1 - end - - # Set orientation (1, 2 -> x; 3, 4 -> y) - if direction in (1, 2) - mpi_mortars.orientations[count] = 1 - else - mpi_mortars.orientations[count] = 2 - end end - end - return nothing + return nothing end - - - - end # @muladd diff --git a/src/solvers/dgsem_tree/containers_3d.jl b/src/solvers/dgsem_tree/containers_3d.jl index 2cf371e3612..0318946e34d 100644 --- a/src/solvers/dgsem_tree/containers_3d.jl +++ b/src/solvers/dgsem_tree/containers_3d.jl @@ -3,17 +3,17 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Container data structure (structure-of-arrays style) for DG elements -mutable struct ElementContainer3D{RealT<:Real, uEltype<:Real} <: AbstractContainer - inverse_jacobian::Vector{RealT} # [elements] - node_coordinates::Array{RealT, 5} # [orientation, i, j, k, elements] - surface_flux_values::Array{uEltype, 5} # [variables, i, j, direction, elements] - cell_ids::Vector{Int} # [elements] - # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _surface_flux_values::Vector{uEltype} +mutable struct ElementContainer3D{RealT <: Real, uEltype <: Real} <: AbstractContainer + inverse_jacobian::Vector{RealT} # [elements] + node_coordinates::Array{RealT, 5} # [orientation, i, j, k, elements] + surface_flux_values::Array{uEltype, 5} # [variables, i, j, direction, elements] + cell_ids::Vector{Int} # [elements] + # internal `resize!`able storage + _node_coordinates::Vector{RealT} + _surface_flux_values::Vector{uEltype} end nvariables(elements::ElementContainer3D) = size(elements.surface_flux_values, 1) @@ -26,51 +26,52 @@ Base.eltype(elements::ElementContainer3D) = eltype(elements.surface_flux_values) # `unsafe_wrap`ping multi-dimensional `Array`s around the # internal storage. function Base.resize!(elements::ElementContainer3D, capacity) - n_nodes = nnodes(elements) - n_variables = nvariables(elements) - @unpack _node_coordinates, _surface_flux_values, - inverse_jacobian, cell_ids = elements + n_nodes = nnodes(elements) + n_variables = nvariables(elements) + @unpack _node_coordinates, _surface_flux_values, + inverse_jacobian, cell_ids = elements - resize!(inverse_jacobian, capacity) + resize!(inverse_jacobian, capacity) - resize!(_node_coordinates, 3 * n_nodes * n_nodes * n_nodes * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (3, n_nodes, n_nodes, n_nodes, capacity)) + resize!(_node_coordinates, 3 * n_nodes * n_nodes * n_nodes * capacity) + elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (3, n_nodes, n_nodes, n_nodes, capacity)) - resize!(_surface_flux_values, n_variables * n_nodes * n_nodes * 2 * 3 * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, n_nodes, n_nodes, 2 * 3, capacity)) + resize!(_surface_flux_values, n_variables * n_nodes * n_nodes * 2 * 3 * capacity) + elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, n_nodes, n_nodes, 2 * 3, + capacity)) - resize!(cell_ids, capacity) + resize!(cell_ids, capacity) - return nothing + return nothing end +function ElementContainer3D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function ElementContainer3D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - inverse_jacobian = fill(nan_RealT, capacity) + # Initialize fields with defaults + inverse_jacobian = fill(nan_RealT, capacity) - _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * n_nodes * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (3, n_nodes, n_nodes, n_nodes, capacity)) + _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * n_nodes * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (3, n_nodes, n_nodes, n_nodes, capacity)) - _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * n_nodes * 2 * 3 * capacity) - surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), - (n_variables, n_nodes, n_nodes, 2 * 3, capacity)) + _surface_flux_values = fill(nan_uEltype, + n_variables * n_nodes * n_nodes * 2 * 3 * capacity) + surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + (n_variables, n_nodes, n_nodes, 2 * 3, capacity)) - cell_ids = fill(typemin(Int), capacity) + cell_ids = fill(typemin(Int), capacity) - - return ElementContainer3D{RealT, uEltype}( - inverse_jacobian, node_coordinates, surface_flux_values, cell_ids, - _node_coordinates, _surface_flux_values) + return ElementContainer3D{RealT, uEltype}(inverse_jacobian, node_coordinates, + surface_flux_values, cell_ids, + _node_coordinates, _surface_flux_values) end - # Return number of elements nelements(elements::ElementContainer3D) = length(elements.cell_ids) # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements)) @@ -84,72 +85,76 @@ In particular, not the elements themselves are returned. @inline eachelement(elements::ElementContainer3D) = Base.OneTo(nelements(elements)) @inline Base.real(elements::ElementContainer3D) = eltype(elements.node_coordinates) - # Create element container and initialize element data function init_elements(cell_ids, mesh::TreeMesh3D, equations::AbstractEquations{3}, - basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real} - # Initialize container - n_elements = length(cell_ids) - elements = ElementContainer3D{RealT, uEltype}( - n_elements, nvariables(equations), nnodes(basis)) - - init_elements!(elements, cell_ids, mesh, basis) - return elements + basis, ::Type{RealT}, + ::Type{uEltype}) where {RealT <: Real, uEltype <: Real} + # Initialize container + n_elements = length(cell_ids) + elements = ElementContainer3D{RealT, uEltype}(n_elements, nvariables(equations), + nnodes(basis)) + + init_elements!(elements, cell_ids, mesh, basis) + return elements end function init_elements!(elements, cell_ids, mesh::TreeMesh3D, basis) - nodes = get_nodes(basis) - # Compute the length of the 1D reference interval by integrating - # the function with constant value unity on the corresponding - # element data type (using \circ) - reference_length = integrate(one ∘ eltype, nodes, basis) - # Compute the offset of the midpoint of the 1D reference interval - # (its difference from zero) - reference_offset = first(nodes) + reference_length / 2 - - # Store cell ids - elements.cell_ids .= cell_ids - - # Calculate inverse Jacobian and node coordinates - for element in eachelement(elements) - # Get cell id - cell_id = cell_ids[element] - - # Get cell length - dx = length_at_cell(mesh.tree, cell_id) - - # Calculate inverse Jacobian - jacobian = dx / reference_length - elements.inverse_jacobian[element] = inv(jacobian) - - # Calculate node coordinates - # Note that the `tree_coordinates` are the midpoints of the cells. - # Hence, we need to add an offset for `nodes` with a midpoint - # different from zero. - for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) - elements.node_coordinates[1, i, j, k, element] = ( - mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset)) - elements.node_coordinates[2, i, j, k, element] = ( - mesh.tree.coordinates[2, cell_id] + jacobian * (nodes[j] - reference_offset)) - elements.node_coordinates[3, i, j, k, element] = ( - mesh.tree.coordinates[3, cell_id] + jacobian * (nodes[k] - reference_offset)) + nodes = get_nodes(basis) + # Compute the length of the 1D reference interval by integrating + # the function with constant value unity on the corresponding + # element data type (using \circ) + reference_length = integrate(one ∘ eltype, nodes, basis) + # Compute the offset of the midpoint of the 1D reference interval + # (its difference from zero) + reference_offset = (first(nodes) + last(nodes)) / 2 + + # Store cell ids + elements.cell_ids .= cell_ids + + # Calculate inverse Jacobian and node coordinates + for element in eachelement(elements) + # Get cell id + cell_id = cell_ids[element] + + # Get cell length + dx = length_at_cell(mesh.tree, cell_id) + + # Calculate inverse Jacobian + jacobian = dx / reference_length + elements.inverse_jacobian[element] = inv(jacobian) + + # Calculate node coordinates + # Note that the `tree_coordinates` are the midpoints of the cells. + # Hence, we need to add an offset for `nodes` with a midpoint + # different from zero. + for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis) + elements.node_coordinates[1, i, j, k, element] = (mesh.tree.coordinates[1, + cell_id] + + jacobian * (nodes[i] - + reference_offset)) + elements.node_coordinates[2, i, j, k, element] = (mesh.tree.coordinates[2, + cell_id] + + jacobian * (nodes[j] - + reference_offset)) + elements.node_coordinates[3, i, j, k, element] = (mesh.tree.coordinates[3, + cell_id] + + jacobian * (nodes[k] - + reference_offset)) + end end - end - return elements + return elements end - - # Container data structure (structure-of-arrays style) for DG interfaces -mutable struct InterfaceContainer3D{uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 5} # [leftright, variables, i, j, interfaces] - neighbor_ids::Matrix{Int} # [leftright, interfaces] - orientations::Vector{Int} # [interfaces] - # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} +mutable struct InterfaceContainer3D{uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 5} # [leftright, variables, i, j, interfaces] + neighbor_ids::Matrix{Int} # [leftright, interfaces] + orientations::Vector{Int} # [interfaces] + # internal `resize!`able storage + _u::Vector{uEltype} + _neighbor_ids::Vector{Int} end nvariables(interfaces::InterfaceContainer3D) = size(interfaces.u, 2) @@ -158,160 +163,155 @@ Base.eltype(interfaces::InterfaceContainer3D) = eltype(interfaces.u) # See explanation of Base.resize! for the element container function Base.resize!(interfaces::InterfaceContainer3D, capacity) - n_nodes = nnodes(interfaces) - n_variables = nvariables(interfaces) - @unpack _u, _neighbor_ids, orientations = interfaces + n_nodes = nnodes(interfaces) + n_variables = nvariables(interfaces) + @unpack _u, _neighbor_ids, orientations = interfaces - resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity) + interfaces.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + resize!(_neighbor_ids, 2 * capacity) + interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function InterfaceContainer3D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function InterfaceContainer3D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, n_nodes, capacity)) - - _neighbor_ids = fill(typemin(Int), 2 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (2, capacity)) + # Initialize fields with defaults + _u = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, n_nodes, capacity)) - orientations = fill(typemin(Int), capacity) + _neighbor_ids = fill(typemin(Int), 2 * capacity) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (2, capacity)) + orientations = fill(typemin(Int), capacity) - return InterfaceContainer3D{uEltype}( - u, neighbor_ids, orientations, - _u, _neighbor_ids) + return InterfaceContainer3D{uEltype}(u, neighbor_ids, orientations, + _u, _neighbor_ids) end - # Return number of interfaces ninterfaces(interfaces::InterfaceContainer3D) = length(interfaces.orientations) - # Create interface container and initialize interface data in `elements`. function init_interfaces(cell_ids, mesh::TreeMesh3D, elements::ElementContainer3D) - # Initialize container - n_interfaces = count_required_interfaces(mesh, cell_ids) - interfaces = InterfaceContainer3D{eltype(elements)}( - n_interfaces, nvariables(elements), nnodes(elements)) - - # Connect elements with interfaces - init_interfaces!(interfaces, elements, mesh) - return interfaces + # Initialize container + n_interfaces = count_required_interfaces(mesh, cell_ids) + interfaces = InterfaceContainer3D{eltype(elements)}(n_interfaces, + nvariables(elements), + nnodes(elements)) + + # Connect elements with interfaces + init_interfaces!(interfaces, elements, mesh) + return interfaces end # Count the number of interfaces that need to be created function count_required_interfaces(mesh::TreeMesh3D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # Only count interfaces in positive direction to avoid double counting - if direction % 2 == 1 - continue - end - - # If no neighbor exists, current cell is small or at boundary and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_id) - continue - end - - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # Only count interfaces in positive direction to avoid double counting + if direction % 2 == 1 + continue + end + + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_id) + continue + end + + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and interfaces function init_interfaces!(interfaces, elements, mesh::TreeMesh3D) - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end - - # Reset interface count - count = 0 + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # Iterate over all elements to find neighbors and to connect via interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - # Loop over directions - for direction in eachdirection(mesh.tree) - # Only create interfaces in positive direction - if direction % 2 == 1 - continue - end - - # If no neighbor exists, current cell is small and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) - count += 1 - interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] - interfaces.neighbor_ids[1, count] = element - - # Set orientation (x -> 1, y -> 2, z -> 3) - if direction in (1, 2) - interfaces.orientations[count] = 1 - elseif direction in (3, 4) - interfaces.orientations[count] = 2 - else - interfaces.orientations[count] = 3 - end + # Iterate over all elements to find neighbors and to connect via interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # Loop over directions + for direction in eachdirection(mesh.tree) + # Only create interfaces in positive direction + if direction % 2 == 1 + continue + end + + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) + count += 1 + interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] + interfaces.neighbor_ids[1, count] = element + + # Set orientation (x -> 1, y -> 2, z -> 3) + if direction in (1, 2) + interfaces.orientations[count] = 1 + elseif direction in (3, 4) + interfaces.orientations[count] = 2 + else + interfaces.orientations[count] = 3 + end + end end - end - @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " * + @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "* "expectations $(ninterfaces(interfaces))") end - - # Container data structure (structure-of-arrays style) for DG boundaries -mutable struct BoundaryContainer3D{RealT<:Real, uEltype<:Real} <: AbstractContainer - u::Array{uEltype, 5} # [leftright, variables, i, j, boundaries] - neighbor_ids::Vector{Int} # [boundaries] - orientations::Vector{Int} # [boundaries] - neighbor_sides::Vector{Int} # [boundaries] - node_coordinates::Array{RealT, 4} # [orientation, i, j, elements] - n_boundaries_per_direction::SVector{6, Int} # [direction] - # internal `resize!`able storage - _u::Vector{uEltype} - _node_coordinates::Vector{RealT} +mutable struct BoundaryContainer3D{RealT <: Real, uEltype <: Real} <: AbstractContainer + u::Array{uEltype, 5} # [leftright, variables, i, j, boundaries] + neighbor_ids::Vector{Int} # [boundaries] + orientations::Vector{Int} # [boundaries] + neighbor_sides::Vector{Int} # [boundaries] + node_coordinates::Array{RealT, 4} # [orientation, i, j, elements] + n_boundaries_per_direction::SVector{6, Int} # [direction] + # internal `resize!`able storage + _u::Vector{uEltype} + _node_coordinates::Vector{RealT} end nvariables(boundaries::BoundaryContainer3D) = size(boundaries.u, 2) @@ -320,181 +320,183 @@ Base.eltype(boundaries::BoundaryContainer3D) = eltype(boundaries.u) # See explanation of Base.resize! for the element container function Base.resize!(boundaries::BoundaryContainer3D, capacity) - n_nodes = nnodes(boundaries) - n_variables = nvariables(boundaries) - @unpack _u, _node_coordinates, - neighbor_ids, orientations, neighbor_sides = boundaries + n_nodes = nnodes(boundaries) + n_variables = nvariables(boundaries) + @unpack _u, _node_coordinates, + neighbor_ids, orientations, neighbor_sides = boundaries - resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity) + boundaries.u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_node_coordinates, 3 * n_nodes * n_nodes * capacity) - boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (3, n_nodes, n_nodes, capacity)) + resize!(_node_coordinates, 3 * n_nodes * n_nodes * capacity) + boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (3, n_nodes, n_nodes, capacity)) - resize!(neighbor_ids, capacity) + resize!(neighbor_ids, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - resize!(neighbor_sides, capacity) + resize!(neighbor_sides, capacity) - return nothing + return nothing end +function BoundaryContainer3D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) -function BoundaryContainer3D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - # Initialize fields with defaults - _u = fill(nan_uEltype, 2 * n_variables * n_nodes * n_nodes * capacity) - u = unsafe_wrap(Array, pointer(_u), - (2, n_variables, n_nodes, n_nodes, capacity)) + # Initialize fields with defaults + _u = fill(nan_uEltype, 2 * n_variables * n_nodes * n_nodes * capacity) + u = unsafe_wrap(Array, pointer(_u), + (2, n_variables, n_nodes, n_nodes, capacity)) - neighbor_ids = fill(typemin(Int), capacity) + neighbor_ids = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - neighbor_sides = fill(typemin(Int), capacity) + neighbor_sides = fill(typemin(Int), capacity) - _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * capacity) - node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), - (3, n_nodes, n_nodes, capacity)) + _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * capacity) + node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + (3, n_nodes, n_nodes, capacity)) - n_boundaries_per_direction = SVector(0, 0, 0, 0, 0, 0) + n_boundaries_per_direction = SVector(0, 0, 0, 0, 0, 0) - return BoundaryContainer3D{RealT, uEltype}( - u, neighbor_ids, orientations, neighbor_sides, - node_coordinates, n_boundaries_per_direction, - _u, _node_coordinates) + return BoundaryContainer3D{RealT, uEltype}(u, neighbor_ids, orientations, + neighbor_sides, + node_coordinates, + n_boundaries_per_direction, + _u, _node_coordinates) end - # Return number of boundaries nboundaries(boundaries::BoundaryContainer3D) = length(boundaries.orientations) - # Create boundaries container and initialize boundary data in `elements`. function init_boundaries(cell_ids, mesh::TreeMesh3D, elements::ElementContainer3D) - # Initialize container - n_boundaries = count_required_boundaries(mesh, cell_ids) - boundaries = BoundaryContainer3D{real(elements), eltype(elements)}( - n_boundaries, nvariables(elements), nnodes(elements)) - - # Connect elements with boundaries - init_boundaries!(boundaries, elements, mesh) - return boundaries + # Initialize container + n_boundaries = count_required_boundaries(mesh, cell_ids) + boundaries = BoundaryContainer3D{real(elements), eltype(elements)}(n_boundaries, + nvariables(elements), + nnodes(elements)) + + # Connect elements with boundaries + init_boundaries!(boundaries, elements, mesh) + return boundaries end # Count the number of boundaries that need to be created function count_required_boundaries(mesh::TreeMesh3D, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # No neighbor exists in this direction -> must be a boundary - count += 1 + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # No neighbor exists in this direction -> must be a boundary + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and boundaries function init_boundaries!(boundaries, elements, mesh::TreeMesh3D) - # Reset boundaries count - count = 0 - - # Initialize boundary counts - counts_per_direction = MVector(0, 0, 0, 0, 0, 0) - - # OBS! Iterate over directions first, then over elements, and count boundaries in each direction - # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., - # obviating the need to store the boundary condition to be applied explicitly. - # Loop over directions - for direction in eachdirection(mesh.tree) - # Iterate over all elements to find missing neighbors and to connect to boundaries - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] - - # If neighbor exists, current cell is not at a boundary - if has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If coarse neighbor exists, current cell is not at a boundary - if has_coarse_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Create boundary - count += 1 - counts_per_direction[direction] += 1 - - # Set neighbor element id - boundaries.neighbor_ids[count] = element - - # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element - if iseven(direction) - boundaries.neighbor_sides[count] = 1 - else - boundaries.neighbor_sides[count] = 2 - end - - # Set orientation (x -> 1, y -> 2) - if direction in (1, 2) - boundaries.orientations[count] = 1 - elseif direction in (3, 4) - boundaries.orientations[count] = 2 - else - boundaries.orientations[count] = 3 - end - - # Store node coordinates - enc = elements.node_coordinates - if direction == 1 # -x direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, 1, :, :, element] - elseif direction == 2 # +x direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, end, :, :, element] - elseif direction == 3 # -y direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, :, 1, :, element] - elseif direction == 4 # +y direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, :, end, :, element] - elseif direction == 5 # -z direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, 1, element] - elseif direction == 6 # +z direction - boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, end, element] - else - error("should not happen") - end + # Reset boundaries count + count = 0 + + # Initialize boundary counts + counts_per_direction = MVector(0, 0, 0, 0, 0, 0) + + # OBS! Iterate over directions first, then over elements, and count boundaries in each direction + # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc., + # obviating the need to store the boundary condition to be applied explicitly. + # Loop over directions + for direction in eachdirection(mesh.tree) + # Iterate over all elements to find missing neighbors and to connect to boundaries + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + # If neighbor exists, current cell is not at a boundary + if has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If coarse neighbor exists, current cell is not at a boundary + if has_coarse_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Create boundary + count += 1 + counts_per_direction[direction] += 1 + + # Set neighbor element id + boundaries.neighbor_ids[count] = element + + # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element + if iseven(direction) + boundaries.neighbor_sides[count] = 1 + else + boundaries.neighbor_sides[count] = 2 + end + + # Set orientation (x -> 1, y -> 2) + if direction in (1, 2) + boundaries.orientations[count] = 1 + elseif direction in (3, 4) + boundaries.orientations[count] = 2 + else + boundaries.orientations[count] = 3 + end + + # Store node coordinates + enc = elements.node_coordinates + if direction == 1 # -x direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, 1, :, :, element] + elseif direction == 2 # +x direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, end, :, :, + element] + elseif direction == 3 # -y direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, :, 1, :, element] + elseif direction == 4 # +y direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, :, end, :, + element] + elseif direction == 5 # -z direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, 1, element] + elseif direction == 6 # +z direction + boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, end, + element] + else + error("should not happen") + end + end end - end - @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " * + @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "* "expectations $(nboundaries(boundaries))") - @assert sum(counts_per_direction) == count + @assert sum(counts_per_direction) == count - boundaries.n_boundaries_per_direction = SVector(counts_per_direction) + boundaries.n_boundaries_per_direction = SVector(counts_per_direction) - return SVector(counts_per_direction) + return SVector(counts_per_direction) end - - # Container data structure (structure-of-arrays style) for DG L2 mortars # Positions/directions for orientations = 1, large_sides = 2: # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar @@ -517,21 +519,21 @@ end # # Left and right are used *both* for the numbering of the mortar faces *and* for the position of the # elements with respect to the axis orthogonal to the mortar. -mutable struct L2MortarContainer3D{uEltype<:Real} <: AbstractContainer - u_upper_left ::Array{uEltype, 5} # [leftright, variables, i, j, mortars] - u_upper_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars] - u_lower_left ::Array{uEltype, 5} # [leftright, variables, i, j, mortars] - u_lower_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars] - neighbor_ids ::Array{Int, 2} # [position, mortars] - # Large sides: left -> 1, right -> 2 - large_sides ::Vector{Int} # [mortars] - orientations::Vector{Int} # [mortars] - # internal `resize!`able storage - _u_upper_left ::Vector{uEltype} - _u_upper_right::Vector{uEltype} - _u_lower_left ::Vector{uEltype} - _u_lower_right::Vector{uEltype} - _neighbor_ids ::Vector{Int} +mutable struct L2MortarContainer3D{uEltype <: Real} <: AbstractContainer + u_upper_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars] + u_upper_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars] + u_lower_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars] + u_lower_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars] + neighbor_ids::Array{Int, 2} # [position, mortars] + # Large sides: left -> 1, right -> 2 + large_sides::Vector{Int} # [mortars] + orientations::Vector{Int} # [mortars] + # internal `resize!`able storage + _u_upper_left::Vector{uEltype} + _u_upper_right::Vector{uEltype} + _u_lower_left::Vector{uEltype} + _u_lower_right::Vector{uEltype} + _neighbor_ids::Vector{Int} end nvariables(mortars::L2MortarContainer3D) = size(mortars.u_upper_left, 2) @@ -540,256 +542,274 @@ Base.eltype(mortars::L2MortarContainer3D) = eltype(mortars.u_upper_left) # See explanation of Base.resize! for the element container function Base.resize!(mortars::L2MortarContainer3D, capacity) - n_nodes = nnodes(mortars) - n_variables = nvariables(mortars) - @unpack _u_upper_left, _u_upper_right, _u_lower_left, _u_lower_right, - _neighbor_ids, large_sides, orientations = mortars + n_nodes = nnodes(mortars) + n_variables = nvariables(mortars) + @unpack _u_upper_left, _u_upper_right, _u_lower_left, _u_lower_right, + _neighbor_ids, large_sides, orientations = mortars - resize!(_u_upper_left, 2 * n_variables * n_nodes * n_nodes * capacity) - mortars.u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u_upper_left, 2 * n_variables * n_nodes * n_nodes * capacity) + mortars.u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_u_upper_right, 2 * n_variables * n_nodes * n_nodes * capacity) - mortars.u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u_upper_right, 2 * n_variables * n_nodes * n_nodes * capacity) + mortars.u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_u_lower_left, 2 * n_variables * n_nodes * n_nodes * capacity) - mortars.u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u_lower_left, 2 * n_variables * n_nodes * n_nodes * capacity) + mortars.u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_u_lower_right, 2 * n_variables * n_nodes * n_nodes * capacity) - mortars.u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right), - (2, n_variables, n_nodes, n_nodes, capacity)) + resize!(_u_lower_right, 2 * n_variables * n_nodes * n_nodes * capacity) + mortars.u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right), + (2, n_variables, n_nodes, n_nodes, capacity)) - resize!(_neighbor_ids, 5 * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (5, capacity)) + resize!(_neighbor_ids, 5 * capacity) + mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (5, capacity)) - resize!(large_sides, capacity) + resize!(large_sides, capacity) - resize!(orientations, capacity) + resize!(orientations, capacity) - return nothing + return nothing end +function L2MortarContainer3D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan = convert(uEltype, NaN) -function L2MortarContainer3D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - nan = convert(uEltype, NaN) - - # Initialize fields with defaults - _u_upper_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) - u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left), - (2, n_variables, n_nodes, n_nodes, capacity)) + # Initialize fields with defaults + _u_upper_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) + u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left), + (2, n_variables, n_nodes, n_nodes, capacity)) - _u_upper_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) - u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right), - (2, n_variables, n_nodes, n_nodes, capacity)) + _u_upper_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) + u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right), + (2, n_variables, n_nodes, n_nodes, capacity)) - _u_lower_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) - u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left), - (2, n_variables, n_nodes, n_nodes, capacity)) + _u_lower_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) + u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left), + (2, n_variables, n_nodes, n_nodes, capacity)) - _u_lower_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) - u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right), - (2, n_variables, n_nodes, n_nodes, capacity)) + _u_lower_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity) + u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right), + (2, n_variables, n_nodes, n_nodes, capacity)) - _neighbor_ids = fill(typemin(Int), 5 * capacity) - neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), - (5, capacity)) + _neighbor_ids = fill(typemin(Int), 5 * capacity) + neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + (5, capacity)) - large_sides = fill(typemin(Int), capacity) + large_sides = fill(typemin(Int), capacity) - orientations = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) - return L2MortarContainer3D{uEltype}( - u_upper_left, u_upper_right, - u_lower_left, u_lower_right, - neighbor_ids, large_sides, orientations, - _u_upper_left, _u_upper_right, - _u_lower_left, _u_lower_right, - _neighbor_ids) + return L2MortarContainer3D{uEltype}(u_upper_left, u_upper_right, + u_lower_left, u_lower_right, + neighbor_ids, large_sides, orientations, + _u_upper_left, _u_upper_right, + _u_lower_left, _u_lower_right, + _neighbor_ids) end - # Return number of L2 mortars nmortars(l2mortars::L2MortarContainer3D) = length(l2mortars.orientations) - # Allow printing container contents function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer3D) - @nospecialize c # reduce precompilation time - - println(io, '*'^20) - for idx in CartesianIndices(c.u_upper_left) - println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])") - end - for idx in CartesianIndices(c.u_upper_right) - println(io, "c.u_upper_right[$idx] = $(c.u_upper_right[idx])") - end - for idx in CartesianIndices(c.u_lower_left) - println(io, "c.u_lower_left[$idx] = $(c.u_lower_left[idx])") - end - for idx in CartesianIndices(c.u_lower_right) - println(io, "c.u_lower_right[$idx] = $(c.u_lower_right[idx])") - end - println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))") - println(io, "c.large_sides = $(c.large_sides)") - println(io, "c.orientations = $(c.orientations)") - print(io, '*'^20) -end + @nospecialize c # reduce precompilation time + println(io, '*'^20) + for idx in CartesianIndices(c.u_upper_left) + println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])") + end + for idx in CartesianIndices(c.u_upper_right) + println(io, "c.u_upper_right[$idx] = $(c.u_upper_right[idx])") + end + for idx in CartesianIndices(c.u_lower_left) + println(io, "c.u_lower_left[$idx] = $(c.u_lower_left[idx])") + end + for idx in CartesianIndices(c.u_lower_right) + println(io, "c.u_lower_right[$idx] = $(c.u_lower_right[idx])") + end + println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))") + println(io, "c.large_sides = $(c.large_sides)") + println(io, "c.orientations = $(c.orientations)") + print(io, '*'^20) +end # Create mortar container and initialize mortar data in `elements`. function init_mortars(cell_ids, mesh::TreeMesh3D, elements::ElementContainer3D, mortar::LobattoLegendreMortarL2) - # Initialize containers - n_mortars = count_required_mortars(mesh, cell_ids) - mortars = L2MortarContainer3D{eltype(elements)}( - n_mortars, nvariables(elements), nnodes(elements)) - - # Connect elements with mortars - init_mortars!(mortars, elements, mesh) - return mortars + # Initialize containers + n_mortars = count_required_mortars(mesh, cell_ids) + mortars = L2MortarContainer3D{eltype(elements)}(n_mortars, nvariables(elements), + nnodes(elements)) + + # Connect elements with mortars + init_mortars!(mortars, elements, mesh) + return mortars end # Count the number of mortars that need to be created function count_required_mortars(mesh::TreeMesh3D, cell_ids) - count = 0 - - # Iterate over all cells and count mortars from perspective of coarse cells - for cell_id in cell_ids - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_id) - continue - end - - count +=1 + count = 0 + + # Iterate over all cells and count mortars from perspective of coarse cells + for cell_id in cell_ids + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_id) + continue + end + + count += 1 + end end - end - return count + return count end # Initialize connectivity between elements and mortars function init_mortars!(mortars, elements, mesh::TreeMesh3D) - # Construct cell -> element mapping for easier algorithm implementation - tree = mesh.tree - c2e = zeros(Int, length(tree)) - for element in eachelement(elements) - c2e[elements.cell_ids[element]] = element - end - - # Reset interface count - count = 0 + # Construct cell -> element mapping for easier algorithm implementation + tree = mesh.tree + c2e = zeros(Int, length(tree)) + for element in eachelement(elements) + c2e[elements.cell_ids[element]] = element + end - # Iterate over all elements to find neighbors and to connect via interfaces - for element in eachelement(elements) - # Get cell id - cell_id = elements.cell_ids[element] + # Reset interface count + count = 0 - for direction in eachdirection(mesh.tree) - # If no neighbor exists, cell is small with large neighbor -> do nothing - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # If neighbor has no children, this is a conforming interface -> do nothing - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if !has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Create mortar between elements (3 possible orientations): - # - # mortar in x-direction: - # 1 -> small element in lower, left position (-y, -z) - # 2 -> small element in lower, right position (+y, -z) - # 3 -> small element in upper, left position (-y, +z) - # 4 -> small element in upper, right position (+y, +z) - # - # mortar in y-direction: - # 1 -> small element in lower, left position (-x, -z) - # 2 -> small element in lower, right position (+x, -z) - # 3 -> small element in upper, left position (-x, +z) - # 4 -> small element in upper, right position (+x, +z) - # - # mortar in z-direction: - # 1 -> small element in lower, left position (-x, -y) - # 2 -> small element in lower, right position (+x, -y) - # 3 -> small element in upper, left position (-x, +y) - # 4 -> small element in upper, right position (+x, +y) - # - # Always the case: - # 5 -> large element - # - count += 1 - mortars.neighbor_ids[5, count] = element - - # Directions are from the perspective of the large element - # ("Where are the small elements? Ah, in the ... direction!") - if direction == 1 # -x - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]] - elseif direction == 2 # +x - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]] - elseif direction == 3 # -y - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]] - elseif direction == 4 # +y - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]] - elseif direction == 5 # -z - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]] - elseif direction == 6 # +z - mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]] - mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]] - else - error("should not happen") - end - - # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side - if iseven(direction) - mortars.large_sides[count] = 1 - else - mortars.large_sides[count] = 2 - end - - # Set orientation (x -> 1, y -> 2, z -> 3) - if direction in (1, 2) - mortars.orientations[count] = 1 - elseif direction in (3, 4) - mortars.orientations[count] = 2 - else - mortars.orientations[count] = 3 - end + # Iterate over all elements to find neighbors and to connect via interfaces + for element in eachelement(elements) + # Get cell id + cell_id = elements.cell_ids[element] + + for direction in eachdirection(mesh.tree) + # If no neighbor exists, cell is small with large neighbor -> do nothing + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # If neighbor has no children, this is a conforming interface -> do nothing + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if !has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Create mortar between elements (3 possible orientations): + # + # mortar in x-direction: + # 1 -> small element in lower, left position (-y, -z) + # 2 -> small element in lower, right position (+y, -z) + # 3 -> small element in upper, left position (-y, +z) + # 4 -> small element in upper, right position (+y, +z) + # + # mortar in y-direction: + # 1 -> small element in lower, left position (-x, -z) + # 2 -> small element in lower, right position (+x, -z) + # 3 -> small element in upper, left position (-x, +z) + # 4 -> small element in upper, right position (+x, +z) + # + # mortar in z-direction: + # 1 -> small element in lower, left position (-x, -y) + # 2 -> small element in lower, right position (+x, -y) + # 3 -> small element in upper, left position (-x, +y) + # 4 -> small element in upper, right position (+x, +y) + # + # Always the case: + # 5 -> large element + # + count += 1 + mortars.neighbor_ids[5, count] = element + + # Directions are from the perspective of the large element + # ("Where are the small elements? Ah, in the ... direction!") + if direction == 1 # -x + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[6, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, + neighbor_cell_id]] + elseif direction == 2 # +x + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[7, + neighbor_cell_id]] + elseif direction == 3 # -y + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, + neighbor_cell_id]] + elseif direction == 4 # +y + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[6, + neighbor_cell_id]] + elseif direction == 5 # -z + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[5, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[6, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, + neighbor_cell_id]] + elseif direction == 6 # +z + mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[3, + neighbor_cell_id]] + mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[4, + neighbor_cell_id]] + else + error("should not happen") + end + + # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side + if iseven(direction) + mortars.large_sides[count] = 1 + else + mortars.large_sides[count] = 2 + end + + # Set orientation (x -> 1, y -> 2, z -> 3) + if direction in (1, 2) + mortars.orientations[count] = 1 + elseif direction in (3, 4) + mortars.orientations[count] = 2 + else + mortars.orientations[count] = 3 + end + end end - end - @assert count == nmortars(mortars) ("Actual mortar count ($count) does not match " * + @assert count==nmortars(mortars) ("Actual mortar count ($count) does not match "* "expectations $(nmortars(mortars))") end - - end # @muladd diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl index 074745f66ca..cb28dad968c 100644 --- a/src/solvers/dgsem_tree/dg.jl +++ b/src/solvers/dgsem_tree/dg.jl @@ -3,48 +3,45 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # du .= zero(eltype(du)) doesn't scale when using multiple threads. # See https://github.com/trixi-framework/Trixi.jl/pull/924 for a performance comparison. function reset_du!(du, dg, cache) - @threaded for element in eachelement(dg, cache) - du[.., element] .= zero(eltype(du)) - end + @threaded for element in eachelement(dg, cache) + du[.., element] .= zero(eltype(du)) + end - return du + return du end - # pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) # # Given blending factors `alpha` and the solver `dg`, fill # `element_ids_dg` with the IDs of elements using a pure DG scheme and # `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme. -function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg::DG, cache) - empty!(element_ids_dg) - empty!(element_ids_dgfv) - - for element in eachelement(dg, cache) - # Clip blending factor for values close to zero (-> pure DG) - dg_only = isapprox(alpha[element], 0, atol=1e-12) - if dg_only - push!(element_ids_dg, element) - else - push!(element_ids_dgfv, element) +function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg::DG, + cache) + empty!(element_ids_dg) + empty!(element_ids_dgfv) + + for element in eachelement(dg, cache) + # Clip blending factor for values close to zero (-> pure DG) + dg_only = isapprox(alpha[element], 0, atol = 1e-12) + if dg_only + push!(element_ids_dg, element) + else + push!(element_ids_dgfv, element) + end end - end - return nothing + return nothing end - function volume_jacobian(element, mesh::TreeMesh, cache) - return inv(cache.elements.inverse_jacobian[element])^ndims(mesh) + return inv(cache.elements.inverse_jacobian[element])^ndims(mesh) end - - # Indicators used for shock-capturing and AMR include("indicators.jl") include("indicators_1d.jl") @@ -74,6 +71,4 @@ include("dg_3d_parabolic.jl") # as well as specialized implementations used to improve performance include("dg_2d_compressible_euler.jl") include("dg_3d_compressible_euler.jl") - - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index a3346a4f15c..c66f427cce3 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # everything related to a DG semidiscretization in 1D, # currently limited to Lobatto-Legendre nodes @@ -13,216 +13,235 @@ # the RHS etc. function create_cache(mesh::TreeMesh{1}, equations, dg::DG, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - cache = (; elements, interfaces, boundaries) + cache = (; elements, interfaces, boundaries) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - return cache + return cache end - # The methods below are specialized on the volume integral type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations, +function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, + equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype) - NamedTuple() + NamedTuple() end - -function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations, +function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, + equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] + element_ids_dg = Int[] + element_ids_dgfv = Int[] - cache = create_cache(mesh, equations, - VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), - dg, uEltype) + cache = create_cache(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype) - A2dp1_x = Array{uEltype, 2} - fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()] + A2dp1_x = Array{uEltype, 2} + fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] - return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, fstar1_R_threaded) + return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, + fstar1_R_threaded) end - -function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations, - volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype) - - A2dp1_x = Array{uEltype, 2} - fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - - return (; fstar1_L_threaded, fstar1_R_threaded) +function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, + equations, + volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, + uEltype) + A2dp1_x = Array{uEltype, 2} + fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + + return (; fstar1_L_threaded, fstar1_R_threaded) end - # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? function rhs!(du, u, t, mesh::TreeMesh{1}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, - equations, dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing +end function calc_volume_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) + @threaded for element in eachelement(dg, cache) + weak_form_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + dg, cache) + end - @threaded for element in eachelement(dg, cache) - weak_form_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - dg, cache) - end - - return nothing + return nothing end @inline function weak_form_kernel!(du, u, element, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis + dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_dhat = dg.basis - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) - flux1 = flux(u_node, 1, equations) - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, element) + flux1 = flux(u_node, 1, equations) + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, + equations, dg, ii, element) + end end - end - return nothing + return nothing end - function calc_volume_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations, - volume_integral.volume_flux, dg, cache) - end + @threaded for element in eachelement(dg, cache) + flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, + equations, + volume_integral.volume_flux, dg, cache) + end end @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + element, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms::False, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - - # Calculate volume integral in one element - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - - # All diagonal entries of `derivative_split` are zero. Thus, we can skip - # the computation of the diagonal terms. In addition, we use the symmetry - # of the `volume_flux` to save half of the possible two-point flux - # computations. - - # x direction - for ii in (i+1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, element) - flux1 = volume_flux(u_node, u_node_ii, 1, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, element) + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + + # Calculate volume integral in one element + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):nnodes(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, element) + flux1 = volume_flux(u_node, u_node_ii, 1, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, + equations, dg, i, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, + equations, dg, ii, element) + end end - end end @inline function flux_differencing_kernel!(du, u, - element, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + element, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms::True, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - symmetric_flux, nonconservative_flux = volume_flux - - # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha) - - # Calculate the remaining volume terms using the nonsymmetric generalized flux - for i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, element) - - # The diagonal terms are zero since the diagonal of `derivative_split` - # is zero. We ignore this for now. - - # x direction - integral_contribution = zero(u_node) - for ii in eachnode(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, element) - noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) - integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1 - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + symmetric_flux, nonconservative_flux = volume_flux - # The factor 0.5 cancels the factor 2 in the flux differencing form - multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, element) - end -end + # Apply the symmetric flux as usual + flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + dg, cache, alpha) + # Calculate the remaining volume terms using the nonsymmetric generalized flux + for i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, element) + + # The diagonal terms are zero since the diagonal of `derivative_split` + # is zero. We ignore this for now. + + # x direction + integral_contribution = zero(u_node) + for ii in eachnode(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, element) + noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) + integral_contribution = integral_contribution + + derivative_split[i, ii] * noncons_flux1 + end + + # The factor 0.5 cancels the factor 2 in the flux differencing form + multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, + dg, i, element) + end +end # TODO: Taal dimension agnostic function calc_volume_integral!(du, u, @@ -230,37 +249,40 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache - @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral - - # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α - alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations, - volume_flux_dg, dg, cache) - end + @unpack element_ids_dg, element_ids_dgfv = cache + @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral + + # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α + alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, + cache) + + # Determine element ids for DG-only and blended DG-FV volume integral + pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) + + # Loop over pure DG elements + @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) + element = element_ids_dg[idx_element] + flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, + equations, + volume_flux_dg, dg, cache) + end - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] - alpha_element = alpha[element] + # Loop over blended DG-FV elements + @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) + element = element_ids_dgfv[idx_element] + alpha_element = alpha[element] - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, + equations, + volume_flux_dg, dg, cache, 1 - alpha_element) - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) - end + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end - return nothing + return nothing end # TODO: Taal dimension agnostic @@ -269,369 +291,367 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache) - @unpack volume_flux_fv = volume_integral + @unpack volume_flux_fv = volume_integral - # Calculate LGL FV volume integral - @threaded for element in eachelement(dg, cache) - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, true) - end + # Calculate LGL FV volume integral + @threaded for element in eachelement(dg, cache) + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, true) + end - return nothing + return nothing end - @inline function fv_kernel!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms, equations, - volume_flux_fv, dg::DGSEM, cache, element, alpha=true) - @unpack fstar1_L_threaded, fstar1_R_threaded = cache - @unpack inverse_weights = dg.basis - - # Calculate FV two-point fluxes - fstar1_L = fstar1_L_threaded[Threads.threadid()] - fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, element, cache) - - # Calculate FV volume integral contribution - for i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, element] += ( alpha * - (inverse_weights[i] * (fstar1_L[v, i+1] - fstar1_R[v, i])) ) + volume_flux_fv, dg::DGSEM, cache, element, alpha = true) + @unpack fstar1_L_threaded, fstar1_R_threaded = cache + @unpack inverse_weights = dg.basis + # Calculate FV two-point fluxes + fstar1_L = fstar1_L_threaded[Threads.threadid()] + fstar1_R = fstar1_R_threaded[Threads.threadid()] + calcflux_fv!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, + volume_flux_fv, + dg, element, cache) + + # Calculate FV volume integral contribution + for i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, element] += (alpha * + (inverse_weights[i] * + (fstar1_L[v, i + 1] - fstar1_R[v, i]))) + end end - end - return nothing + return nothing end - -@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any,3}, +@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any, 3}, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) + fstar1_L[:, 1] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1] .= zero(eltype(fstar1_R)) + + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, element) + u_rr = get_node_vars(u, equations, dg, i, element) + flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction + set_node_vars!(fstar1_L, flux, equations, dg, i) + set_node_vars!(fstar1_R, flux, equations, dg, i) + end - fstar1_L[:, 1 ] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1 ] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1] .= zero(eltype(fstar1_R)) - - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, element) - u_rr = get_node_vars(u, equations, dg, i , element) - flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction - set_node_vars!(fstar1_L, flux, equations, dg, i) - set_node_vars!(fstar1_R, flux, equations, dg, i) - end - - return nothing + return nothing end - -@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any,3}, +@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any, 3}, mesh::TreeMesh{1}, nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) - volume_flux, nonconservative_flux = volume_flux_fv - - fstar1_L[:, 1 ] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1 ] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1] .= zero(eltype(fstar1_R)) - - for i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, element) - u_rr = get_node_vars(u, equations, dg, i , element) - - # Compute conservative part - f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) - f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) - - # Copy to temporary storage - set_node_vars!(fstar1_L, f1_L, equations, dg, i) - set_node_vars!(fstar1_R, f1_R, equations, dg, i) - end + volume_flux, nonconservative_flux = volume_flux_fv + + fstar1_L[:, 1] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1] .= zero(eltype(fstar1_R)) + + for i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, element) + u_rr = get_node_vars(u, equations, dg, i, element) + + # Compute conservative part + f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction + + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) + f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) + + # Copy to temporary storage + set_node_vars!(fstar1_L, f1_L, equations, dg, i) + set_node_vars!(fstar1_R, f1_R, equations, dg, i) + end - return nothing + return nothing end - # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, surface_integral, dg::DG) - @unpack interfaces = cache + @unpack interfaces = cache - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] - # interface in x-direction - for v in eachvariable(equations) - interfaces.u[1, v, interface] = u[v, nnodes(dg), left_element] - interfaces.u[2, v, interface] = u[v, 1, right_element] + # interface in x-direction + for v in eachvariable(equations) + interfaces.u[1, v, interface] = u[v, nnodes(dg), left_element] + interfaces.u[2, v, interface] = u[v, 1, right_element] + end end - end - return nothing + return nothing end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{1}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) - flux = surface_flux(u_ll, u_rr, orientations[interface], equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, left_direction, left_id] = flux[v] - surface_flux_values[v, right_direction, right_id] = flux[v] + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) + flux = surface_flux(u_ll, u_rr, orientations[interface], equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, left_direction, left_id] = flux[v] + surface_flux_values[v, right_direction, right_id] = flux[v] + end end - end end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{1}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - # Call pointwise Riemann solver - orientation = orientations[interface] - u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) - flux = surface_flux(u_ll, u_rr, orientation, equations) - - # Compute both nonconservative fluxes - noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) - noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, left_direction, left_id] = flux[v] + 0.5 * noncons_left[v] - surface_flux_values[v, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v] + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + # Call pointwise Riemann solver + orientation = orientations[interface] + u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) + + # Compute both nonconservative fluxes + noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) + noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, left_direction, left_id] = flux[v] + + 0.5 * noncons_left[v] + surface_flux_values[v, right_direction, right_id] = flux[v] + + 0.5 * noncons_right[v] + end end - end - return nothing + return nothing end - function prolong2boundaries!(cache, u, mesh::TreeMesh{1}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - @unpack neighbor_sides = boundaries - - @threaded for boundary in eachboundary(dg, cache) - element = boundaries.neighbor_ids[boundary] - - # boundary in x-direction - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary - for v in eachvariable(equations) - boundaries.u[1, v, boundary] = u[v, nnodes(dg), element] - end - else # Element in +x direction of boundary - for v in eachvariable(equations) - boundaries.u[2, v, boundary] = u[v, 1, element] - end + @unpack boundaries = cache + @unpack neighbor_sides = boundaries + + @threaded for boundary in eachboundary(dg, cache) + element = boundaries.neighbor_ids[boundary] + + # boundary in x-direction + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for v in eachvariable(equations) + boundaries.u[1, v, boundary] = u[v, nnodes(dg), element] + end + else # Element in +x direction of boundary + for v in eachvariable(equations) + boundaries.u[2, v, boundary] = u[v, 1, element] + end + end end - end - return nothing + return nothing end # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic, mesh::TreeMesh{1}, equations, surface_integral, dg::DG) - @assert isempty(eachboundary(dg, cache)) + @assert isempty(eachboundary(dg, cache)) end - function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple, mesh::TreeMesh{1}, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], - have_nonconservative_terms(equations), equations, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], - have_nonconservative_terms(equations), equations, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], + have_nonconservative_terms(equations), equations, + surface_integral, dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], + have_nonconservative_terms(equations), equations, + surface_integral, dg, cache, + 2, firsts[2], lasts[2]) end - -function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,3}, t, - boundary_condition, nonconservative_terms::False, equations, +function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 3}, + t, + boundary_condition, + nonconservative_terms::False, equations, surface_integral, dg::DG, cache, direction, first_boundary, last_boundary) - - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - x = get_node_coords(node_coordinates, equations, dg, boundary) - flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux, - equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, direction, neighbor] = flux[v] + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + x = get_node_coords(node_coordinates, equations, dg, boundary) + flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, + surface_flux, + equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, direction, neighbor] = flux[v] + end end - end - return nothing + return nothing end -function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,3}, t, - boundary_condition, nonconservative_terms::True, equations, +function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 3}, + t, + boundary_condition, + nonconservative_terms::True, equations, surface_integral, dg::DG, cache, direction, first_boundary, last_boundary) - - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - x = get_node_coords(node_coordinates, equations, dg, boundary) - flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux, - equations) - noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, nonconservative_flux, - equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, direction, neighbor] = flux[v] + 0.5 * noncons_flux[v] + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + x = get_node_coords(node_coordinates, equations, dg, boundary) + flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, + surface_flux, + equations) + noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x, + t, nonconservative_flux, + equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, direction, neighbor] = flux[v] + + 0.5 * noncons_flux[v] + end end - end - return nothing + return nothing end function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, surface_integral, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for v in eachvariable(equations) - # surface at -x - du[v, 1, element] = ( - du[v, 1, element] - surface_flux_values[v, 1, element] * factor_1) - - # surface at +x - du[v, nnodes(dg), element] = ( - du[v, nnodes(dg), element] + surface_flux_values[v, 2, element] * factor_2) + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for v in eachvariable(equations) + # surface at -x + du[v, 1, element] = (du[v, 1, element] - + surface_flux_values[v, 1, element] * factor_1) + + # surface at +x + du[v, nnodes(dg), element] = (du[v, nnodes(dg), element] + + surface_flux_values[v, 2, element] * factor_2) + end end - end - return nothing + return nothing end - function apply_jacobian!(du, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - factor = -cache.elements.inverse_jacobian[element] - - for i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, element] *= factor - end + @threaded for element in eachelement(dg, cache) + factor = -cache.elements.inverse_jacobian[element] + + for i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, element] *= factor + end + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_sources!(du, u, t, source_terms::Nothing, equations::AbstractEquations{1}, dg::DG, cache) - return nothing + return nothing end function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{1}, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, element) - du_local = source_terms(u_local, x_local, t, equations) - add_to_node_vars!(du, du_local, equations, dg, i, element) + @threaded for element in eachelement(dg, cache) + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + element) + du_local = source_terms(u_local, x_local, t, equations) + add_to_node_vars!(du, du_local, equations, dg, i, element) + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl index be4235c627b..c2aa75388c8 100644 --- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # This file collects all methods that have been updated to work with parabolic systems of equations # @@ -12,71 +13,90 @@ # 2. compute f(u, grad(u)) # 3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call) # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))). -function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, +function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, initial_condition, boundary_conditions_parabolic, source_terms, dg::DG, parabolic_scheme, cache, cache_parabolic) - @unpack u_transformed, gradients, flux_viscous = cache_parabolic - - # Convert conservative variables to a form more suitable for viscous flux calculations - @trixi_timeit timer() "transform variables" transform_variables!( - u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic) - - # Compute the gradients of the transformed variables - @trixi_timeit timer() "calculate gradient" calc_gradient!( - gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg, - cache, cache_parabolic) - - # Compute and store the viscous fluxes - @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!( - flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic) - - # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it - # computes the divergence of the viscous fluxes) - # - # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have - # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the - # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the - # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it - # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* - # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we - # do not need to recreate the existing data structure only with a different name, and c) we do not - # need to interpolate solutions *and* gradients to the surfaces. - - # TODO: parabolic; reconsider current data structure reuse strategy - - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, flux_viscous, mesh, equations_parabolic, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations_parabolic, dg, cache_parabolic) - - return nothing + @unpack u_transformed, gradients, flux_viscous = cache_parabolic + + # Convert conservative variables to a form more suitable for viscous flux calculations + @trixi_timeit timer() "transform variables" begin + transform_variables!(u_transformed, u, mesh, equations_parabolic, + dg, parabolic_scheme, cache, cache_parabolic) + end + + # Compute the gradients of the transformed variables + @trixi_timeit timer() "calculate gradient" begin + calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic, + boundary_conditions_parabolic, dg, cache, cache_parabolic) + end + + # Compute and store the viscous fluxes + @trixi_timeit timer() "calculate viscous fluxes" begin + calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh, + equations_parabolic, dg, cache, cache_parabolic) + end + + # The remainder of this function is essentially a regular rhs! for + # parabolic equations (i.e., it computes the divergence of the viscous fluxes) + # + # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have + # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the + # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the + # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it + # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* + # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we + # do not need to recreate the existing data structure only with a different name, and c) we do not + # need to interpolate solutions *and* gradients to the surfaces. + + # TODO: parabolic; reconsider current data structure reuse strategy + + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh, + equations_parabolic, dg, cache_parabolic) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_divergence!(cache_parabolic, t, + boundary_conditions_parabolic, mesh, + equations_parabolic, + dg.surface_integral, dg) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations_parabolic, + dg.surface_integral, dg, cache_parabolic) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic) + end + + return nothing end # Transform solution variables prior to taking the gradient @@ -85,418 +105,463 @@ end function transform_variables!(u_transformed, u, mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, cache, cache_parabolic) - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for i in eachnode(dg) - u_node = get_node_vars(u, equations_parabolic, dg, i, element) - u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic) - set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, element) + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for i in eachnode(dg) + u_node = get_node_vars(u, equations_parabolic, dg, i, element) + u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, + equations_parabolic) + set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, + i, element) + end end - end end # This is the version used when calculating the divergence of the viscous fluxes function calc_volume_integral!(du, flux_viscous, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, dg::DGSEM, cache) - @unpack derivative_dhat = dg.basis - - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for i in eachnode(dg) - flux_1_node = get_node_vars(flux_viscous, equations_parabolic, dg, i, element) + @unpack derivative_dhat = dg.basis - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, element) - end + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for i in eachnode(dg) + flux_1_node = get_node_vars(flux_viscous, equations_parabolic, dg, i, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, + equations_parabolic, dg, ii, element) + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache_parabolic, flux_viscous, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache) - @unpack interfaces = cache_parabolic + @unpack interfaces = cache_parabolic - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] - # interface in x-direction - for v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element] - interfaces.u[2, v, interface] = flux_viscous[v, 1, right_element] + # interface in x-direction + for v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element] + interfaces.u[2, v, interface] = flux_viscous[v, 1, right_element] + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{1}, equations_parabolic, dg::DG, cache_parabolic) - @unpack neighbor_ids, orientations = cache_parabolic.interfaces - - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - # Get precomputed fluxes at interfaces - flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic, - dg, interface) - - # Compute interface flux as mean of left and right viscous fluxes - # TODO: parabolic; only BR1 at the moment - flux = 0.5 * (flux_ll + flux_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, left_direction, left_id] = flux[v] - surface_flux_values[v, right_direction, right_id] = flux[v] - end - end + @unpack neighbor_ids, orientations = cache_parabolic.interfaces - return nothing -end + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 -# This is the version used when calculating the divergence of the viscous fluxes -function prolong2boundaries!(cache_parabolic, flux_viscous, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG, cache) - @unpack boundaries = cache_parabolic - @unpack neighbor_sides = boundaries + # Get precomputed fluxes at interfaces + flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, + dg, interface) - @threaded for boundary in eachboundary(dg, cache_parabolic) - element = boundaries.neighbor_ids[boundary] + # Compute interface flux as mean of left and right viscous fluxes + # TODO: parabolic; only BR1 at the moment + flux = 0.5 * (flux_ll + flux_rr) - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary + # Copy flux to left and right element storage for v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, boundary] = flux_viscous[v, nnodes(dg), element] + surface_flux_values[v, left_direction, left_id] = flux[v] + surface_flux_values[v, right_direction, right_id] = flux[v] end - else # Element in +x direction of boundary - for v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, boundary] = flux_viscous[v, 1, element] - end - end end - return nothing + return nothing end +# This is the version used when calculating the divergence of the viscous fluxes +function prolong2boundaries!(cache_parabolic, flux_viscous, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG, cache) + @unpack boundaries = cache_parabolic + @unpack neighbor_sides = boundaries + + @threaded for boundary in eachboundary(dg, cache_parabolic) + element = boundaries.neighbor_ids[boundary] + + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, boundary] = flux_viscous[v, nnodes(dg), element] + end + else # Element in +x direction of boundary + for v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, boundary] = flux_viscous[v, 1, element] + end + end + end + + return nothing +end function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache, cache_parabolic) - - @threaded for element in eachelement(dg, cache) - for i in eachnode(dg) - # Get solution and gradients - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element) - gradients_1_node = get_node_vars(gradients, equations_parabolic, dg, i, element) - - # Calculate viscous flux and store each component for later use - flux_viscous_node = flux(u_node, gradients_1_node, 1, equations_parabolic) - set_node_vars!(flux_viscous, flux_viscous_node, equations_parabolic, dg, i, element) + @threaded for element in eachelement(dg, cache) + for i in eachnode(dg) + # Get solution and gradients + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element) + gradients_1_node = get_node_vars(gradients, equations_parabolic, dg, i, + element) + + # Calculate viscous flux and store each component for later use + flux_viscous_node = flux(u_node, gradients_1_node, 1, equations_parabolic) + set_node_vars!(flux_viscous, flux_viscous_node, equations_parabolic, dg, i, + element) + end end - end end - -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - return nothing +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - return nothing + return nothing end -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, dg, + cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, dg, + cache, + 2, firsts[2], lasts[2]) end -function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,3}, t, +function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{ + <:Any, + 3 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end - # TODO: revisit if we want more general boundary treatments. - # This assumes the gradient numerical flux at the boundary is the gradient variable, - # which is consistent with BR1, LDG. - flux_inner = u_inner + # TODO: revisit if we want more general boundary treatments. + # This assumes the gradient numerical flux at the boundary is the gradient variable, + # which is consistent with BR1, LDG. + flux_inner = u_inner - x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary) - flux = boundary_condition(flux_inner, u_inner, orientations[boundary], direction, - x, t, Gradient(), equations_parabolic) + x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary) + flux = boundary_condition(flux_inner, u_inner, orientations[boundary], + direction, + x, t, Gradient(), equations_parabolic) - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, direction, neighbor] = flux[v] + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, direction, neighbor] = flux[v] + end end - end - return nothing + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{1}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, + dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, + dg, cache, + 2, firsts[2], lasts[2]) end -function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,3}, t, +function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{ + <:Any, + 3 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - - # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") - # of the viscous flux, as computed in `prolong2boundaries!` - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - # Get viscous boundary fluxes - flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - flux_inner = flux_ll - else # Element is on the right, boundary on the left - flux_inner = flux_rr - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + + # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") + # of the viscous flux, as computed in `prolong2boundaries!` + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + # Get viscous boundary fluxes + flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + flux_inner = flux_ll + else # Element is on the right, boundary on the left + flux_inner = flux_rr + end - x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary) + x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary) - # TODO: add a field in `cache.boundaries` for gradient information. - # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. - # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and - # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27. - # It will not work with implementations which utilize `u_inner` to impose boundary conditions. - flux = boundary_condition(flux_inner, nothing, orientations[boundary], direction, - x, t, Divergence(), equations_parabolic) + # TODO: add a field in `cache.boundaries` for gradient information. + # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. + # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and + # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27. + # It will not work with implementations which utilize `u_inner` to impose boundary conditions. + flux = boundary_condition(flux_inner, nothing, orientations[boundary], + direction, + x, t, Divergence(), equations_parabolic) - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, direction, neighbor] = flux[v] + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, direction, neighbor] = flux[v] + end end - end - return nothing + return nothing end - # Calculate the gradient of the transformed variables function calc_gradient!(gradients, u_transformed, t, mesh::TreeMesh{1}, equations_parabolic, boundary_conditions_parabolic, dg::DG, cache, cache_parabolic) - # Reset du - @trixi_timeit timer() "reset gradients" begin - reset_du!(gradients, dg, cache) - end - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - @unpack derivative_dhat = dg.basis - @threaded for element in eachelement(dg, cache) - - # Calculate volume terms in one element - for i in eachnode(dg) - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element) + # Reset du + @trixi_timeit timer() "reset gradients" begin + reset_du!(gradients, dg, cache) + end - for ii in eachnode(dg) - multiply_add_to_node_vars!(gradients, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, element) + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + @unpack derivative_dhat = dg.basis + @threaded for element in eachelement(dg, cache) + + # Calculate volume terms in one element + for i in eachnode(dg) + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(gradients, derivative_dhat[ii, i], + u_node, equations_parabolic, dg, ii, + element) + end + end end - end end - end - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - @unpack surface_flux_values = cache_parabolic.elements - @unpack neighbor_ids, orientations = cache_parabolic.interfaces - - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, - equations_parabolic, dg, interface) - flux = 0.5 * (u_ll + u_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, left_direction, left_id] = flux[v] - surface_flux_values[v, right_direction, right_id] = flux[v] - end + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(cache_parabolic, + u_transformed, mesh, + equations_parabolic, + dg.surface_integral, + dg) + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + @unpack surface_flux_values = cache_parabolic.elements + @unpack neighbor_ids, orientations = cache_parabolic.interfaces + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, dg, interface) + flux = 0.5 * (u_ll + u_rr) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, left_direction, left_id] = flux[v] + surface_flux_values[v, right_direction, right_id] = flux[v] + end + end end - end - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache_parabolic.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for v in eachvariable(equations_parabolic) - # surface at -x - gradients[v, 1, element] = ( - gradients[v, 1, element] - surface_flux_values[v, 1, element] * factor_1) - - # surface at +x - gradients[v, nnodes(dg), element] = ( - gradients[v, nnodes(dg), element] + surface_flux_values[v, 2, element] * factor_2) - end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(cache_parabolic, + u_transformed, mesh, + equations_parabolic, + dg.surface_integral, + dg) + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(cache_parabolic, + t, + boundary_conditions_parabolic, + mesh, + equations_parabolic, + dg.surface_integral, + dg) + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache_parabolic.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for v in eachvariable(equations_parabolic) + # surface at -x + gradients[v, 1, element] = (gradients[v, 1, element] - + surface_flux_values[v, 1, element] * + factor_1) + + # surface at +x + gradients[v, nnodes(dg), element] = (gradients[v, nnodes(dg), element] + + surface_flux_values[v, 2, + element] * + factor_2) + end + end end - end - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" begin - apply_jacobian!(gradients, mesh, equations_parabolic, dg, cache_parabolic) - end + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(gradients, mesh, equations_parabolic, dg, + cache_parabolic) + end - return nothing + return nothing end - # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache_parabolic(mesh::TreeMesh{1}, equations_hyperbolic::AbstractEquations, +function create_cache_parabolic(mesh::TreeMesh{1}, + equations_hyperbolic::AbstractEquations, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, + uEltype) - n_vars = nvariables(equations_hyperbolic) - n_nodes = nnodes(elements) - n_elements = nelements(elements) - u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_elements) - gradients = similar(u_transformed) - flux_viscous = similar(u_transformed) + n_vars = nvariables(equations_hyperbolic) + n_nodes = nnodes(elements) + n_elements = nelements(elements) + u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_elements) + gradients = similar(u_transformed) + flux_viscous = similar(u_transformed) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) + cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) - return cache + return cache end - # Needed to *not* flip the sign of the inverse Jacobian. # This is because the parabolic fluxes are assumed to be of the form # `du/dt + df/dx = dg/dx + source(x,t)`, # where f(u) is the inviscid flux and g(u) is the viscous flux. -function apply_jacobian!(du, mesh::TreeMesh{1}, - equations::AbstractEquationsParabolic, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - factor = cache.elements.inverse_jacobian[element] +function apply_jacobian_parabolic!(du, mesh::TreeMesh{1}, + equations::AbstractEquationsParabolic, dg::DG, cache) + @threaded for element in eachelement(dg, cache) + factor = cache.elements.inverse_jacobian[element] - for i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, element] *= factor - end + for i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, element] *= factor + end + end end - end - return nothing + return nothing end - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 3ce9f611a8b..d3227710686 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # everything related to a DG semidiscretization in 2D, # currently limited to Lobatto-Legendre nodes @@ -13,318 +13,353 @@ # the RHS etc. function create_cache(mesh::TreeMesh{2}, equations, dg::DG, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - cache = (; elements, interfaces, boundaries, mortars) + cache = (; elements, interfaces, boundaries, mortars) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) - return cache + return cache end - # The methods below are specialized on the volume integral type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, - equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype) - NamedTuple() +function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, + equations, volume_integral::VolumeIntegralFluxDifferencing, + dg::DG, uEltype) + NamedTuple() end - -function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations, +function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] - - cache = create_cache(mesh, equations, - VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), - dg, uEltype) - - A3dp1_x = Array{uEltype, 3} - A3dp1_y = Array{uEltype, 3} - - fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()] - fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - - return (; cache..., element_ids_dg, element_ids_dgfv, - fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded) + element_ids_dg = Int[] + element_ids_dgfv = Int[] + + cache = create_cache(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype) + + A3dp1_x = Array{uEltype, 3} + A3dp1_y = Array{uEltype, 3} + + fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg)) for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg)) for _ in 1:Threads.nthreads()] + fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1) for _ in 1:Threads.nthreads()] + fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1) for _ in 1:Threads.nthreads()] + + return (; cache..., element_ids_dg, element_ids_dgfv, + fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded) end - -function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations, - volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype) - - A3dp1_x = Array{uEltype, 3} - A3dp1_y = Array{uEltype, 3} - - fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()] - fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()] - - return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded) +function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, + volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, + uEltype) + A3dp1_x = Array{uEltype, 3} + A3dp1_y = Array{uEltype, 3} + + fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg)) for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg)) for _ in 1:Threads.nthreads()] + fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1) for _ in 1:Threads.nthreads()] + fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1) for _ in 1:Threads.nthreads()] + + return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, + fstar2_R_threaded) end # The methods below are specialized on the mortar type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, +function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, + P4estMesh{2}}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype) - # TODO: Taal performance using different types - MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, uEltype, 2, nvariables(equations) * nnodes(mortar_l2)} - fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] - fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] + # TODO: Taal performance using different types + MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, uEltype, 2, + nvariables(equations) * nnodes(mortar_l2)} + fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] + fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()] - # A2d = Array{uEltype, 2} - # fstar_upper_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()] - # fstar_lower_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()] + # A2d = Array{uEltype, 2} + # fstar_upper_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()] + # fstar_lower_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()] - (; fstar_upper_threaded, fstar_lower_threaded) + (; fstar_upper_threaded, fstar_lower_threaded) end - # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? function rhs!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" prolong2mortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" calc_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Prolong solution to mortars + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing +end function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) + @threaded for element in eachelement(dg, cache) + weak_form_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + dg, cache) + end - @threaded for element in eachelement(dg, cache) - weak_form_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - dg, cache) - end - - return nothing + return nothing end @inline function weak_form_kernel!(du, u, element, mesh::TreeMesh{2}, nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis - - # Calculate volume terms in one element - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - flux1 = flux(u_node, 1, equations) - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, j, element) - end + dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_dhat = dg.basis - flux2 = flux(u_node, 2, equations) - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, equations, dg, i, jj, element) + # Calculate volume terms in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + flux1 = flux(u_node, 1, equations) + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, + equations, dg, ii, j, element) + end + + flux2 = flux(u_node, 2, equations) + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, + equations, dg, i, jj, element) + end end - end - return nothing + return nothing end - # flux differencing volume integral. For curved meshes averaging of the # mapping terms, stored in `cache.elements.contravariant_vectors`, is peeled apart # from the evaluation of the physical fluxes in each Cartesian direction function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_integral.volume_flux, dg, cache) - end + @threaded for element in eachelement(dg, cache) + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_integral.volume_flux, dg, cache) + end end @inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, nonconservative_terms::False, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - - # Calculate volume integral in one element - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - # All diagonal entries of `derivative_split` are zero. Thus, we can skip - # the computation of the diagonal terms. In addition, we use the symmetry - # of the `volume_flux` to save half of the possible two-point flux - # computations. - - # x direction - for ii in (i+1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, element) - flux1 = volume_flux(u_node, u_node_ii, 1, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i, j, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, j, element) - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + + # Calculate volume integral in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):nnodes(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, element) + flux1 = volume_flux(u_node, u_node_ii, 1, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, + equations, dg, i, j, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, + equations, dg, ii, j, element) + end - # y direction - for jj in (j+1):nnodes(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, element) - flux2 = volume_flux(u_node, u_node_jj, 2, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, equations, dg, i, j, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, equations, dg, i, jj, element) + # y direction + for jj in (j + 1):nnodes(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, element) + flux2 = volume_flux(u_node, u_node_jj, 2, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, + equations, dg, i, j, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, + equations, dg, i, jj, element) + end end - end end @inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, nonconservative_terms::True, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - symmetric_flux, nonconservative_flux = volume_flux - - # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha) - - # Calculate the remaining volume terms using the nonsymmetric generalized flux - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - - # The diagonal terms are zero since the diagonal of `derivative_split` - # is zero. We ignore this for now. - - # x direction - integral_contribution = zero(u_node) - for ii in eachnode(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, element) - noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) - integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1 - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + symmetric_flux, nonconservative_flux = volume_flux - # y direction - for jj in eachnode(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, element) - noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations) - integral_contribution = integral_contribution + derivative_split[j, jj] * noncons_flux2 - end + # Apply the symmetric flux as usual + flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + dg, cache, alpha) - # The factor 0.5 cancels the factor 2 in the flux differencing form - multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, element) - end -end + # Calculate the remaining volume terms using the nonsymmetric generalized flux + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + + # The diagonal terms are zero since the diagonal of `derivative_split` + # is zero. We ignore this for now. + + # x direction + integral_contribution = zero(u_node) + for ii in eachnode(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, element) + noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) + integral_contribution = integral_contribution + + derivative_split[i, ii] * noncons_flux1 + end + # y direction + for jj in eachnode(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, element) + noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations) + integral_contribution = integral_contribution + + derivative_split[j, jj] * noncons_flux2 + end + + # The factor 0.5 cancels the factor 2 in the flux differencing form + multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, + dg, i, j, element) + end +end # TODO: Taal dimension agnostic function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache - @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral - - # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α - alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache) - end + @unpack element_ids_dg, element_ids_dgfv = cache + @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral + + # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α + alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, + cache) + + # Determine element ids for DG-only and blended DG-FV volume integral + pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) + + # Loop over pure DG elements + @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) + element = element_ids_dg[idx_element] + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache) + end - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] - alpha_element = alpha[element] + # Loop over blended DG-FV elements + @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) + element = element_ids_dgfv[idx_element] + alpha_element = alpha[element] - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) - end + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end - return nothing + return nothing end # TODO: Taal dimension agnostic @@ -333,47 +368,47 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache) - @unpack volume_flux_fv = volume_integral + @unpack volume_flux_fv = volume_integral - # Calculate LGL FV volume integral - @threaded for element in eachelement(dg, cache) - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, true) - end + # Calculate LGL FV volume integral + @threaded for element in eachelement(dg, cache) + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, true) + end - return nothing + return nothing end - @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, + mesh::Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}, nonconservative_terms, equations, - volume_flux_fv, dg::DGSEM, cache, element, alpha=true) - @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache - @unpack inverse_weights = dg.basis - - # Calculate FV two-point fluxes - fstar1_L = fstar1_L_threaded[Threads.threadid()] - fstar2_L = fstar2_L_threaded[Threads.threadid()] - fstar1_R = fstar1_R_threaded[Threads.threadid()] - fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, - nonconservative_terms, equations, volume_flux_fv, dg, element, cache) - - # Calculate FV volume integral contribution - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, element] += ( alpha * - (inverse_weights[i] * (fstar1_L[v, i+1, j] - fstar1_R[v, i, j]) + - inverse_weights[j] * (fstar2_L[v, i, j+1] - fstar2_R[v, i, j])) ) + volume_flux_fv, dg::DGSEM, cache, element, alpha = true) + @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache + @unpack inverse_weights = dg.basis + + # Calculate FV two-point fluxes + fstar1_L = fstar1_L_threaded[Threads.threadid()] + fstar2_L = fstar2_L_threaded[Threads.threadid()] + fstar1_R = fstar1_R_threaded[Threads.threadid()] + fstar2_R = fstar2_R_threaded[Threads.threadid()] + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + nonconservative_terms, equations, volume_flux_fv, dg, element, cache) + + # Calculate FV volume integral contribution + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, element] += (alpha * + (inverse_weights[i] * + (fstar1_L[v, i + 1, j] - fstar1_R[v, i, j]) + + inverse_weights[j] * + (fstar2_L[v, i, j + 1] - fstar2_R[v, i, j]))) + end end - end - return nothing + return nothing end - - # calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u_leftright, # nonconservative_terms::False, equations, # volume_flux_fv, dg, element) @@ -385,37 +420,38 @@ end # - `fstar1_R::AbstractArray{<:Real, 3}` # - `fstar2_L::AbstractArray{<:Real, 3}` # - `fstar2_R::AbstractArray{<:Real, 3}` -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4}, - mesh::TreeMesh{2}, nonconservative_terms::False, equations, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, + u::AbstractArray{<:Any, 4}, + mesh::TreeMesh{2}, nonconservative_terms::False, + equations, volume_flux_fv, dg::DGSEM, element, cache) + fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R)) + + for j in eachnode(dg), i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction + set_node_vars!(fstar1_L, flux, equations, dg, i, j) + set_node_vars!(fstar1_R, flux, equations, dg, i, j) + end + + fstar2_L[:, :, 1] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R)) + + for j in 2:nnodes(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction + set_node_vars!(fstar2_L, flux, equations, dg, i, j) + set_node_vars!(fstar2_R, flux, equations, dg, i, j) + end - fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R)) - - for j in eachnode(dg), i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction - set_node_vars!(fstar1_L, flux, equations, dg, i, j) - set_node_vars!(fstar1_R, flux, equations, dg, i, j) - end - - fstar2_L[:, :, 1 ] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 ] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R)) - - for j in 2:nnodes(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction - set_node_vars!(fstar2_L, flux, equations, dg, i, j) - set_node_vars!(fstar2_R, flux, equations, dg, i, j) - end - - return nothing + return nothing end # calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u_leftright, @@ -430,423 +466,449 @@ end # - `fstar2_L::AbstractArray{<:Real, 3}`: # - `fstar2_R::AbstractArray{<:Real, 3}`: # - `u_leftright::AbstractArray{<:Real, 4}` -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4}, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, + u::AbstractArray{<:Any, 4}, mesh::TreeMesh{2}, nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) - volume_flux, nonconservative_flux = volume_flux_fv - - # Fluxes in x - fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R)) - - for j in eachnode(dg), i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - - # Compute conservative part - f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) - f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) - - # Copy to temporary storage - set_node_vars!(fstar1_L, f1_L, equations, dg, i, j) - set_node_vars!(fstar1_R, f1_R, equations, dg, i, j) - end - - # Fluxes in y - fstar2_L[:, :, 1 ] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 ] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R)) - - # Compute inner fluxes - for j in 2:nnodes(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, element) - - # Compute conservative part - f2 = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - f2_L = f2 + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations) - f2_R = f2 + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations) - - # Copy to temporary storage - set_node_vars!(fstar2_L, f2_L, equations, dg, i, j) - set_node_vars!(fstar2_R, f2_R, equations, dg, i, j) - end - - return nothing -end + volume_flux, nonconservative_flux = volume_flux_fv + + # Fluxes in x + fstar1_L[:, 1, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R)) + + for j in eachnode(dg), i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + + # Compute conservative part + f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction + + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) + f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) + + # Copy to temporary storage + set_node_vars!(fstar1_L, f1_L, equations, dg, i, j) + set_node_vars!(fstar1_R, f1_R, equations, dg, i, j) + end + + # Fluxes in y + fstar2_L[:, :, 1] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R)) + + # Compute inner fluxes + for j in 2:nnodes(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, element) + + # Compute conservative part + f2 = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + f2_L = f2 + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations) + f2_R = f2 + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations) + + # Copy to temporary storage + set_node_vars!(fstar2_L, f2_L, equations, dg, i, j) + set_node_vars!(fstar2_R, f2_R, equations, dg, i, j) + end + + return nothing +end function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, surface_integral, dg::DG) - @unpack interfaces = cache - @unpack orientations = interfaces - - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] - - if orientations[interface] == 1 - # interface in x-direction - for j in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, left_element] - interfaces.u[2, v, j, interface] = u[v, 1, j, right_element] - end - else # if orientations[interface] == 2 - # interface in y-direction - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), left_element] - interfaces.u[2, v, i, interface] = u[v, i, 1, right_element] - end + @unpack interfaces = cache + @unpack orientations = interfaces + + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] + + if orientations[interface] == 1 + # interface in x-direction + for j in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, left_element] + interfaces.u[2, v, j, interface] = u[v, 1, j, right_element] + end + else # if orientations[interface] == 2 + # interface in y-direction + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), left_element] + interfaces.u[2, v, i, interface] = u[v, i, 1, right_element] + end + end end - end - return nothing + return nothing end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for i in eachnode(dg) - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) - flux = surface_flux(u_ll, u_rr, orientations[interface], equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, i, left_direction, left_id] = flux[v] - surface_flux_values[v, i, right_direction, right_id] = flux[v] - end + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in eachnode(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) + flux = surface_flux(u_ll, u_rr, orientations[interface], equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, left_direction, left_id] = flux[v] + surface_flux_values[v, i, right_direction, right_id] = flux[v] + end + end end - end - return nothing + return nothing end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for i in eachnode(dg) - # Call pointwise Riemann solver - orientation = orientations[interface] - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) - flux = surface_flux(u_ll, u_rr, orientation, equations) - - # Compute both nonconservative fluxes - noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) - noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, i, left_direction, left_id] = flux[v] + 0.5 * noncons_left[v] - surface_flux_values[v, i, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v] - end + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in eachnode(dg) + # Call pointwise Riemann solver + orientation = orientations[interface] + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) + + # Compute both nonconservative fluxes + noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) + noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, i, left_direction, left_id] = flux[v] + + 0.5 * + noncons_left[v] + surface_flux_values[v, i, right_direction, right_id] = flux[v] + + 0.5 * + noncons_right[v] + end + end end - end - return nothing + return nothing end - function prolong2boundaries!(cache, u, mesh::TreeMesh{2}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - @unpack orientations, neighbor_sides = boundaries - - @threaded for boundary in eachboundary(dg, cache) - element = boundaries.neighbor_ids[boundary] - - if orientations[boundary] == 1 - # boundary in x-direction - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[1, v, l, boundary] = u[v, nnodes(dg), l, element] - end - else # Element in +x direction of boundary - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[2, v, l, boundary] = u[v, 1, l, element] - end - end - else # if orientations[boundary] == 2 - # boundary in y-direction - if neighbor_sides[boundary] == 1 - # element in -y direction of boundary - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[1, v, l, boundary] = u[v, l, nnodes(dg), element] + @unpack boundaries = cache + @unpack orientations, neighbor_sides = boundaries + + @threaded for boundary in eachboundary(dg, cache) + element = boundaries.neighbor_ids[boundary] + + if orientations[boundary] == 1 + # boundary in x-direction + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[1, v, l, boundary] = u[v, nnodes(dg), l, element] + end + else # Element in +x direction of boundary + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[2, v, l, boundary] = u[v, 1, l, element] + end + end + else # if orientations[boundary] == 2 + # boundary in y-direction + if neighbor_sides[boundary] == 1 + # element in -y direction of boundary + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[1, v, l, boundary] = u[v, l, nnodes(dg), element] + end + else + # element in +y direction of boundary + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[2, v, l, boundary] = u[v, l, 1, element] + end + end end - else - # element in +y direction of boundary - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[2, v, l, boundary] = u[v, l, 1, element] - end - end end - end - return nothing + return nothing end # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic, mesh::TreeMesh{2}, equations, surface_integral, dg::DG) - @assert isempty(eachboundary(dg, cache)) + @assert isempty(eachboundary(dg, cache)) end function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple, mesh::TreeMesh{2}, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], - have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], - have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3], - have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4], - have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], + have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], + have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3], + have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4], + have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + 4, firsts[4], lasts[4]) end -function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,4}, t, - boundary_condition, nonconservative_terms::False, equations, - surface_integral ,dg::DG, cache, +function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 4}, + t, + boundary_condition, + nonconservative_terms::False, equations, + surface_integral, dg::DG, cache, direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for i in eachnode(dg) - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - x = get_node_coords(node_coordinates, equations, dg, i, boundary) - flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux, - equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, i, direction, neighbor] = flux[v] - end + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for i in eachnode(dg) + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + x = get_node_coords(node_coordinates, equations, dg, i, boundary) + flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, + surface_flux, + equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end -function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,4}, t, - boundary_condition, nonconservative_terms::True, equations, +function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 4}, + t, + boundary_condition, + nonconservative_terms::True, equations, surface_integral, dg::DG, cache, direction, first_boundary, last_boundary) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for i in eachnode(dg) - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - x = get_node_coords(node_coordinates, equations, dg, i, boundary) - flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux, - equations) - noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, nonconservative_flux, - equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, i, direction, neighbor] = flux[v] + 0.5 * noncons_flux[v] - end + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for i in eachnode(dg) + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + x = get_node_coords(node_coordinates, equations, dg, i, boundary) + flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, + surface_flux, + equations) + noncons_flux = boundary_condition(u_inner, orientations[boundary], + direction, x, t, nonconservative_flux, + equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, direction, neighbor] = flux[v] + + 0.5 * noncons_flux[v] + end + end end - end - return nothing + return nothing end - function prolong2mortars!(cache, u, mesh::TreeMesh{2}, equations, - mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - - @threaded for mortar in eachmortar(dg, cache) - - large_element = cache.mortars.neighbor_ids[3, mortar] - upper_element = cache.mortars.neighbor_ids[2, mortar] - lower_element = cache.mortars.neighbor_ids[1, mortar] - - # Copy solution small to small - if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - for l in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper[2, v, l, mortar] = u[v, 1, l, upper_element] - cache.mortars.u_lower[2, v, l, mortar] = u[v, 1, l, lower_element] - end - end - else - # L2 mortars in y-direction - for l in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper[2, v, l, mortar] = u[v, l, 1, upper_element] - cache.mortars.u_lower[2, v, l, mortar] = u[v, l, 1, lower_element] - end - end - end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - for l in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, upper_element] - cache.mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, lower_element] - end + mortar_l2::LobattoLegendreMortarL2, surface_integral, + dg::DGSEM) + @threaded for mortar in eachmortar(dg, cache) + large_element = cache.mortars.neighbor_ids[3, mortar] + upper_element = cache.mortars.neighbor_ids[2, mortar] + lower_element = cache.mortars.neighbor_ids[1, mortar] + + # Copy solution small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[2, v, l, mortar] = u[v, 1, l, + upper_element] + cache.mortars.u_lower[2, v, l, mortar] = u[v, 1, l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[2, v, l, mortar] = u[v, l, 1, + upper_element] + cache.mortars.u_lower[2, v, l, mortar] = u[v, l, 1, + lower_element] + end + end + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, + upper_element] + cache.mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), + upper_element] + cache.mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), + lower_element] + end + end + end end - else - # L2 mortars in y-direction - for l in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), upper_element] - cache.mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), lower_element] - end - end - end - end - # Interpolate large element face data to small interface locations - if cache.mortars.large_sides[mortar] == 1 # -> large element on left side - leftright = 1 - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, nnodes(dg), :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large) - else - # L2 mortars in y-direction - u_large = view(u, :, :, nnodes(dg), large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large) - end - else # large_sides[mortar] == 2 -> large element on right side - leftright = 2 - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, 1, :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large) - else - # L2 mortars in y-direction - u_large = view(u, :, :, 1, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large) - end + # Interpolate large element face data to small interface locations + if cache.mortars.large_sides[mortar] == 1 # -> large element on left side + leftright = 1 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, nnodes(dg), :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, nnodes(dg), large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + end + else # large_sides[mortar] == 2 -> large element on right side + leftright = 2 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, 1, :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, 1, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + end + end end - end - return nothing + return nothing end -@inline function element_solutions_to_mortars!(mortars, mortar_l2::LobattoLegendreMortarL2, leftright, mortar, - u_large::AbstractArray{<:Any,2}) - multiply_dimensionwise!(view(mortars.u_upper, leftright, :, :, mortar), mortar_l2.forward_upper, u_large) - multiply_dimensionwise!(view(mortars.u_lower, leftright, :, :, mortar), mortar_l2.forward_lower, u_large) - return nothing +@inline function element_solutions_to_mortars!(mortars, + mortar_l2::LobattoLegendreMortarL2, + leftright, mortar, + u_large::AbstractArray{<:Any, 2}) + multiply_dimensionwise!(view(mortars.u_upper, leftright, :, :, mortar), + mortar_l2.forward_upper, u_large) + multiply_dimensionwise!(view(mortars.u_lower, leftright, :, :, mortar), + mortar_l2.forward_lower, u_large) + return nothing end - function calc_mortar_flux!(surface_flux_values, mesh::TreeMesh{2}, nonconservative_terms::False, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u_lower, u_upper, orientations = cache.mortars - @unpack fstar_upper_threaded, fstar_lower_threaded = cache - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar_upper = fstar_upper_threaded[Threads.threadid()] - fstar_lower = fstar_lower_threaded[Threads.threadid()] - - # Calculate fluxes - orientation = orientations[mortar] - calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation) - calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation) - - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar_upper, fstar_lower) - end - - return nothing + @unpack surface_flux = surface_integral + @unpack u_lower, u_upper, orientations = cache.mortars + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper = fstar_upper_threaded[Threads.threadid()] + fstar_lower = fstar_lower_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, + orientation) + calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, + orientation) + + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar_upper, fstar_lower) + end + + return nothing end function calc_mortar_flux!(surface_flux_values, @@ -854,76 +916,88 @@ function calc_mortar_flux!(surface_flux_values, nonconservative_terms::True, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u_lower, u_upper, orientations, large_sides = cache.mortars - @unpack fstar_upper_threaded, fstar_lower_threaded = cache - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar_upper = fstar_upper_threaded[Threads.threadid()] - fstar_lower = fstar_lower_threaded[Threads.threadid()] - - # Calculate fluxes - orientation = orientations[mortar] - calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation) - calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation) - - # Add nonconservative fluxes. - # These need to be adapted on the geometry (left/right) since the order of - # the arguments matters, based on the global SBP operator interpretation. - # The same interpretation (global SBP operators coupled discontinuously via - # central fluxes/SATs) explains why we need the factor 0.5. - # Alternatively, you can also follow the argumentation of Bohm et al. 2018 - # ("nonconservative diamond flux") - if large_sides[mortar] == 1 # -> small elements on right side - for i in eachnode(dg) - # Pull the left and right solutions - u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, i, mortar) - u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, i, mortar) - # Call pointwise nonconservative term - noncons_upper = nonconservative_flux(u_upper_ll, u_upper_rr, orientation, equations) - noncons_lower = nonconservative_flux(u_lower_ll, u_lower_rr, orientation, equations) - # Add to primary and secondary temporary storage - multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, dg, i) - multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, dg, i) - end - else # large_sides[mortar] == 2 -> small elements on the left - for i in eachnode(dg) - # Pull the left and right solutions - u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, i, mortar) - u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, i, mortar) - # Call pointwise nonconservative term - noncons_upper = nonconservative_flux(u_upper_rr, u_upper_ll, orientation, equations) - noncons_lower = nonconservative_flux(u_lower_rr, u_lower_ll, orientation, equations) - # Add to primary and secondary temporary storage - multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, dg, i) - multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, dg, i) - end - end + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u_lower, u_upper, orientations, large_sides = cache.mortars + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper = fstar_upper_threaded[Threads.threadid()] + fstar_lower = fstar_lower_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, + orientation) + calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, + orientation) + + # Add nonconservative fluxes. + # These need to be adapted on the geometry (left/right) since the order of + # the arguments matters, based on the global SBP operator interpretation. + # The same interpretation (global SBP operators coupled discontinuously via + # central fluxes/SATs) explains why we need the factor 0.5. + # Alternatively, you can also follow the argumentation of Bohm et al. 2018 + # ("nonconservative diamond flux") + if large_sides[mortar] == 1 # -> small elements on right side + for i in eachnode(dg) + # Pull the left and right solutions + u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, + i, mortar) + u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, + i, mortar) + # Call pointwise nonconservative term + noncons_upper = nonconservative_flux(u_upper_ll, u_upper_rr, + orientation, equations) + noncons_lower = nonconservative_flux(u_lower_ll, u_lower_rr, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, + dg, i) + multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, + dg, i) + end + else # large_sides[mortar] == 2 -> small elements on the left + for i in eachnode(dg) + # Pull the left and right solutions + u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, + i, mortar) + u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, + i, mortar) + # Call pointwise nonconservative term + noncons_upper = nonconservative_flux(u_upper_rr, u_upper_ll, + orientation, equations) + noncons_lower = nonconservative_flux(u_lower_rr, u_lower_ll, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, + dg, i) + multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, + dg, i) + end + end - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar_upper, fstar_lower) - end + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar_upper, fstar_lower) + end - return nothing + return nothing end - -@inline function calc_fstar!(destination::AbstractArray{<:Any,2}, equations, +@inline function calc_fstar!(destination::AbstractArray{<:Any, 2}, equations, surface_flux, dg::DGSEM, u_interfaces, interface, orientation) + for i in eachnode(dg) + # Call pointwise two-point numerical flux function + u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) - for i in eachnode(dg) - # Call pointwise two-point numerical flux function - u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, interface) - flux = surface_flux(u_ll, u_rr, orientation, equations) - - # Copy flux to left and right element storage - set_node_vars!(destination, flux, equations, dg, i) - end + # Copy flux to left and right element storage + set_node_vars!(destination, flux, equations, dg, i) + end - return nothing + return nothing end @inline function mortar_fluxes_to_elements!(surface_flux_values, @@ -931,150 +1005,148 @@ end mortar_l2::LobattoLegendreMortarL2, dg::DGSEM, cache, mortar, fstar_upper, fstar_lower) - large_element = cache.mortars.neighbor_ids[3, mortar] - upper_element = cache.mortars.neighbor_ids[2, mortar] - lower_element = cache.mortars.neighbor_ids[1, mortar] - - # Copy flux small to small - if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - else - # L2 mortars in y-direction - direction = 3 - end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - else - # L2 mortars in y-direction - direction = 4 - end - end - surface_flux_values[:, :, direction, upper_element] .= fstar_upper - surface_flux_values[:, :, direction, lower_element] .= fstar_lower - - # Project small fluxes to large element - if cache.mortars.large_sides[mortar] == 1 # -> large element on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - else - # L2 mortars in y-direction - direction = 4 + large_element = cache.mortars.neighbor_ids[3, mortar] + upper_element = cache.mortars.neighbor_ids[2, mortar] + lower_element = cache.mortars.neighbor_ids[1, mortar] + + # Copy flux small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end end - else # large_sides[mortar] == 2 -> large element on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - else - # L2 mortars in y-direction - direction = 3 + surface_flux_values[:, :, direction, upper_element] .= fstar_upper + surface_flux_values[:, :, direction, lower_element] .= fstar_lower + + # Project small fluxes to large element + if cache.mortars.large_sides[mortar] == 1 # -> large element on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end + else # large_sides[mortar] == 2 -> large element on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end end - end - - # TODO: Taal performance - # for v in eachvariable(equations) - # # The code below is semantically equivalent to - # # surface_flux_values[v, :, direction, large_element] .= - # # (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :]) - # # but faster and does not allocate. - # # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as - # # a universal `one`. Hence, the second `mul!` means "add the matrix-vector - # # product to the current value of the destination". - # @views mul!(surface_flux_values[v, :, direction, large_element], - # mortar_l2.reverse_upper, fstar_upper[v, :]) - # @views mul!(surface_flux_values[v, :, direction, large_element], - # mortar_l2.reverse_lower, fstar_lower[v, :], true, true) - # end - # The code above could be replaced by the following code. However, the relative efficiency - # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper. - # Using StaticArrays for both makes the code above faster for common test cases. - multiply_dimensionwise!( - view(surface_flux_values, :, :, direction, large_element), mortar_l2.reverse_upper, fstar_upper, - mortar_l2.reverse_lower, fstar_lower) - - return nothing -end + # TODO: Taal performance + # for v in eachvariable(equations) + # # The code below is semantically equivalent to + # # surface_flux_values[v, :, direction, large_element] .= + # # (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :]) + # # but faster and does not allocate. + # # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as + # # a universal `one`. Hence, the second `mul!` means "add the matrix-vector + # # product to the current value of the destination". + # @views mul!(surface_flux_values[v, :, direction, large_element], + # mortar_l2.reverse_upper, fstar_upper[v, :]) + # @views mul!(surface_flux_values[v, :, direction, large_element], + # mortar_l2.reverse_lower, fstar_lower[v, :], true, true) + # end + # The code above could be replaced by the following code. However, the relative efficiency + # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper. + # Using StaticArrays for both makes the code above faster for common test cases. + multiply_dimensionwise!(view(surface_flux_values, :, :, direction, large_element), + mortar_l2.reverse_upper, fstar_upper, + mortar_l2.reverse_lower, fstar_lower) + + return nothing +end function calc_surface_integral!(du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DG, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = ( - du[v, 1, l, element] - surface_flux_values[v, l, 1, element] * factor_1) - - # surface at +x - du[v, nnodes(dg), l, element] = ( - du[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2) - - # surface at -y - du[v, l, 1, element] = ( - du[v, l, 1, element] - surface_flux_values[v, l, 3, element] * factor_1) - - # surface at +y - du[v, l, nnodes(dg), element] = ( - du[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2) - end + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] - + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] - + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end end - end - return nothing + return nothing end - function apply_jacobian!(du, mesh::TreeMesh{2}, equations, dg::DG, cache) + @threaded for element in eachelement(dg, cache) + factor = -cache.elements.inverse_jacobian[element] - @threaded for element in eachelement(dg, cache) - factor = -cache.elements.inverse_jacobian[element] - - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_sources!(du, u, t, source_terms::Nothing, equations::AbstractEquations{2}, dg::DG, cache) - return nothing + return nothing end function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{2}, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element) - du_local = source_terms(u_local, x_local, t, equations) - add_to_node_vars!(du, du_local, equations, dg, i, j, element) + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + j, element) + du_local = source_terms(u_local, x_local, t, equations) + add_to_node_vars!(du, du_local, equations, dg, i, j, element) + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl index 9d6e3c452b0..50b1e8cb5b4 100644 --- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl @@ -3,51 +3,50 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Calculate the vorticity on a single node using the derivative matrix from the polynomial basis of # a DGSEM solver. `u` is the solution on the whole domain. # This function is used for calculating acoustic source terms for coupled Euler-acoustics # simulations. -function calc_vorticity_node(u, mesh::TreeMesh{2}, equations::CompressibleEulerEquations2D, +function calc_vorticity_node(u, mesh::TreeMesh{2}, + equations::CompressibleEulerEquations2D, dg::DGSEM, cache, i, j, element) - @unpack derivative_matrix = dg.basis - - v2_x = zero(eltype(u)) # derivative of v2 in x direction - for ii in eachnode(dg) - rho, _, rho_v2 = get_node_vars(u, equations, dg, ii, j, element) - v2 = rho_v2 / rho - v2_x = v2_x + derivative_matrix[i, ii] * v2 - end - - v1_y = zero(eltype(u)) # derivative of v1 in y direction - for jj in eachnode(dg) - rho, rho_v1 = get_node_vars(u, equations, dg, i, jj, element) - v1 = rho_v1 / rho - v1_y = v1_y + derivative_matrix[j, jj] * v1 - end - - return (v2_x - v1_y) * cache.elements.inverse_jacobian[element] + @unpack derivative_matrix = dg.basis + + v2_x = zero(eltype(u)) # derivative of v2 in x direction + for ii in eachnode(dg) + rho, _, rho_v2 = get_node_vars(u, equations, dg, ii, j, element) + v2 = rho_v2 / rho + v2_x = v2_x + derivative_matrix[i, ii] * v2 + end + + v1_y = zero(eltype(u)) # derivative of v1 in y direction + for jj in eachnode(dg) + rho, rho_v1 = get_node_vars(u, equations, dg, i, jj, element) + v1 = rho_v1 / rho + v1_y = v1_y + derivative_matrix[j, jj] * v1 + end + + return (v2_x - v1_y) * cache.elements.inverse_jacobian[element] end # Convenience function for calculating the vorticity on the whole domain and storing it in a # preallocated array -function calc_vorticity!(vorticity, u, mesh::TreeMesh{2}, equations::CompressibleEulerEquations2D, +function calc_vorticity!(vorticity, u, mesh::TreeMesh{2}, + equations::CompressibleEulerEquations2D, dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - vorticity[i, j, element] = calc_vorticity_node(u, mesh, equations, dg, cache, i, j, element) + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + vorticity[i, j, element] = calc_vorticity_node(u, mesh, equations, dg, + cache, i, j, element) + end end - end - return nothing + return nothing end - - end # muladd - - # From here on, this file contains specializations of DG methods on the # TreeMesh2D to the compressible Euler equations. # @@ -62,7 +61,6 @@ end # muladd # We do not wrap this code in `@muladd begin ... end` block. Optimizations like # this are handled automatically by LoopVectorization.jl. - # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array` # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @@ -72,161 +70,161 @@ end # muladd equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - @turbo for j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, element] - rho_v1 = u_cons[2, i, j, element] - rho_v2 = u_cons[3, i, j, element] - rho_e = u_cons[4, i, j, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - u_prim[i, j, 1] = rho - u_prim[i, j, 2] = v1 - u_prim[i, j, 3] = v2 - u_prim[i, j, 4] = p - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - u_prim_permuted[j, i, v] = u_prim[i, j, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for j in eachnode(dg) - rho_ll = u_prim_permuted[j, i, 1] - v1_ll = u_prim_permuted[j, i, 2] - v2_ll = u_prim_permuted[j, i, 3] - p_ll = u_prim_permuted[j, i, 4] - - rho_rr = u_prim_permuted[j, ii, 1] - v1_rr = u_prim_permuted[j, ii, 2] - v2_rr = u_prim_permuted[j, ii, 3] - p_rr = u_prim_permuted[j, ii, 4] - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - p_avg = 0.5 * ( p_ll + p_rr) - kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[j, i, 1] += factor_i * f1 - du_permuted[j, i, 2] += factor_i * f2 - du_permuted[j, i, 3] += factor_i * f3 - du_permuted[j, i, 4] += factor_i * f4 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[j, ii, 1] += factor_ii * f1 - du_permuted[j, ii, 2] += factor_ii * f2 - du_permuted[j, ii, 3] += factor_ii * f3 - du_permuted[j, ii, 4] += factor_ii * f4 + @unpack derivative_split = dg.basis + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations)))) + + @turbo for j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, element] + rho_v1 = u_cons[2, i, j, element] + rho_v2 = u_cons[3, i, j, element] + rho_e = u_cons[4, i, j, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + u_prim[i, j, 1] = rho + u_prim[i, j, 2] = v1 + u_prim[i, j, 3] = v2 + u_prim[i, j, 4] = p + end + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + u_prim_permuted[j, i, v] = u_prim[i, j, v] end - end - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - du[i, j, v] = du_permuted[j, i, v] - end - - - # y direction - # The memory layout is already optimal for SIMD vectorization in this loop. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for i in eachnode(dg) - rho_ll = u_prim[i, j, 1] - v1_ll = u_prim[i, j, 2] - v2_ll = u_prim[i, j, 3] - p_ll = u_prim[i, j, 4] - - rho_rr = u_prim[i, jj, 1] - v1_rr = u_prim[i, jj, 2] - v2_rr = u_prim[i, jj, 3] - p_rr = u_prim[i, jj, 4] - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - p_avg = 0.5 * ( p_ll + p_rr) - kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, 1] += factor_j * f1 - du[i, j, 2] += factor_j * f2 - du[i, j, 3] += factor_j * f3 - du[i, j, 4] += factor_j * f4 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, 1] += factor_jj * f1 - du[i, jj, 2] += factor_jj * f2 - du[i, jj, 3] += factor_jj * f3 - du[i, jj, 4] += factor_jj * f4 + fill!(du_permuted, zero(eltype(du_permuted))) + + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for j in eachnode(dg) + rho_ll = u_prim_permuted[j, i, 1] + v1_ll = u_prim_permuted[j, i, 2] + v2_ll = u_prim_permuted[j, i, 3] + p_ll = u_prim_permuted[j, i, 4] + + rho_rr = u_prim_permuted[j, ii, 1] + v1_rr = u_prim_permuted[j, ii, 2] + v2_rr = u_prim_permuted[j, ii, 3] + p_rr = u_prim_permuted[j, ii, 4] + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[j, i, 1] += factor_i * f1 + du_permuted[j, i, 2] += factor_i * f2 + du_permuted[j, i, 3] += factor_i * f3 + du_permuted[j, i, 4] += factor_i * f4 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[j, ii, 1] += factor_ii * f1 + du_permuted[j, ii, 2] += factor_ii * f2 + du_permuted[j, ii, 3] += factor_ii * f3 + du_permuted[j, ii, 4] += factor_ii * f4 + end end - end + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, element] += du[i, j, v] - end -end + du[i, j, v] = du_permuted[j, i, v] + end + + # y direction + # The memory layout is already optimal for SIMD vectorization in this loop. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for i in eachnode(dg) + rho_ll = u_prim[i, j, 1] + v1_ll = u_prim[i, j, 2] + v2_ll = u_prim[i, j, 3] + p_ll = u_prim[i, j, 4] + + rho_rr = u_prim[i, jj, 1] + v1_rr = u_prim[i, jj, 2] + v2_rr = u_prim[i, jj, 3] + p_rr = u_prim[i, jj, 4] + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, 1] += factor_j * f1 + du[i, j, 2] += factor_j * f2 + du[i, j, 3] += factor_j * f3 + du[i, j, 4] += factor_j * f4 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, 1] += factor_jj * f1 + du[i, jj, 2] += factor_jj * f2 + du[i, jj, 3] += factor_jj * f3 + du[i, jj, 4] += factor_jj * f4 + end + end + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + _du[v, i, j, element] += du[i, j, v] + end +end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, mesh::TreeMesh{2}, @@ -234,225 +232,231 @@ end equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. In addition - # to the usual primitive variables, we also compute logarithms of the density - # and pressure to increase the performance of the required logarithmic mean - # values. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs - - @turbo for j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, element] - rho_v1 = u_cons[2, i, j, element] - rho_v2 = u_cons[3, i, j, element] - rho_e = u_cons[4, i, j, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) - - u_prim[i, j, 1] = rho - u_prim[i, j, 2] = v1 - u_prim[i, j, 3] = v2 - u_prim[i, j, 4] = p - u_prim[i, j, 5] = log(rho) - u_prim[i, j, 6] = log(p) - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) - - @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs - j in eachnode(dg), - i in eachnode(dg) - u_prim_permuted[j, i, v] = u_prim[i, j, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for j in eachnode(dg) - rho_ll = u_prim_permuted[j, i, 1] - v1_ll = u_prim_permuted[j, i, 2] - v2_ll = u_prim_permuted[j, i, 3] - p_ll = u_prim_permuted[j, i, 4] - log_rho_ll = u_prim_permuted[j, i, 5] - log_p_ll = u_prim_permuted[j, i, 6] - - rho_rr = u_prim_permuted[j, ii, 1] - v1_rr = u_prim_permuted[j, ii, 2] - v2_rr = u_prim_permuted[j, ii, 3] - p_rr = u_prim_permuted[j, ii, 4] - log_rho_rr = u_prim_permuted[j, ii, 5] - log_p_rr = u_prim_permuted[j, ii, 6] - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[j, i, 1] += factor_i * f1 - du_permuted[j, i, 2] += factor_i * f2 - du_permuted[j, i, 3] += factor_i * f3 - du_permuted[j, i, 4] += factor_i * f4 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[j, ii, 1] += factor_ii * f1 - du_permuted[j, ii, 2] += factor_ii * f2 - du_permuted[j, ii, 3] += factor_ii * f3 - du_permuted[j, ii, 4] += factor_ii * f4 + @unpack derivative_split = dg.basis + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. In addition + # to the usual primitive variables, we also compute logarithms of the density + # and pressure to increase the performance of the required logarithmic mean + # values. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs + + @turbo for j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, element] + rho_v1 = u_cons[2, i, j, element] + rho_v2 = u_cons[3, i, j, element] + rho_e = u_cons[4, i, j, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2)) + + u_prim[i, j, 1] = rho + u_prim[i, j, 2] = v1 + u_prim[i, j, 3] = v2 + u_prim[i, j, 4] = p + u_prim[i, j, 5] = log(rho) + u_prim[i, j, 6] = log(p) + end + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs + j in eachnode(dg), + i in eachnode(dg) + + u_prim_permuted[j, i, v] = u_prim[i, j, v] + end + fill!(du_permuted, zero(eltype(du_permuted))) + + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for j in eachnode(dg) + rho_ll = u_prim_permuted[j, i, 1] + v1_ll = u_prim_permuted[j, i, 2] + v2_ll = u_prim_permuted[j, i, 3] + p_ll = u_prim_permuted[j, i, 4] + log_rho_ll = u_prim_permuted[j, i, 5] + log_p_ll = u_prim_permuted[j, i, 6] + + rho_rr = u_prim_permuted[j, ii, 1] + v1_rr = u_prim_permuted[j, ii, 2] + v2_rr = u_prim_permuted[j, ii, 3] + p_rr = u_prim_permuted[j, ii, 4] + log_rho_rr = u_prim_permuted[j, ii, 5] + log_p_rr = u_prim_permuted[j, ii, 6] + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[j, i, 1] += factor_i * f1 + du_permuted[j, i, 2] += factor_i * f2 + du_permuted[j, i, 3] += factor_i * f3 + du_permuted[j, i, 4] += factor_i * f4 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[j, ii, 1] += factor_ii * f1 + du_permuted[j, ii, 2] += factor_ii * f2 + du_permuted[j, ii, 3] += factor_ii * f3 + du_permuted[j, ii, 4] += factor_ii * f4 + end + end + + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) + + du[i, j, v] = du_permuted[j, i, v] end - end - - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - du[i, j, v] = du_permuted[j, i, v] - end - - - # y direction - # The memory layout is already optimal for SIMD vectorization in this loop. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for i in eachnode(dg) - rho_ll = u_prim[i, j, 1] - v1_ll = u_prim[i, j, 2] - v2_ll = u_prim[i, j, 3] - p_ll = u_prim[i, j, 4] - log_rho_ll = u_prim[i, j, 5] - log_p_ll = u_prim[i, j, 6] - - rho_rr = u_prim[i, jj, 1] - v1_rr = u_prim[i, jj, 2] - v2_rr = u_prim[i, jj, 3] - p_rr = u_prim[i, jj, 4] - log_rho_rr = u_prim[i, jj, 5] - log_p_rr = u_prim[i, jj, 6] - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, 1] += factor_j * f1 - du[i, j, 2] += factor_j * f2 - du[i, j, 3] += factor_j * f3 - du[i, j, 4] += factor_j * f4 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, 1] += factor_jj * f1 - du[i, jj, 2] += factor_jj * f2 - du[i, jj, 3] += factor_jj * f3 - du[i, jj, 4] += factor_jj * f4 + + # y direction + # The memory layout is already optimal for SIMD vectorization in this loop. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for i in eachnode(dg) + rho_ll = u_prim[i, j, 1] + v1_ll = u_prim[i, j, 2] + v2_ll = u_prim[i, j, 3] + p_ll = u_prim[i, j, 4] + log_rho_ll = u_prim[i, j, 5] + log_p_ll = u_prim[i, j, 6] + + rho_rr = u_prim[i, jj, 1] + v1_rr = u_prim[i, jj, 2] + v2_rr = u_prim[i, jj, 3] + p_rr = u_prim[i, jj, 4] + log_rho_rr = u_prim[i, jj, 5] + log_p_rr = u_prim[i, jj, 6] + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, 1] += factor_j * f1 + du[i, j, 2] += factor_j * f2 + du[i, j, 3] += factor_j * f3 + du[i, j, 4] += factor_j * f4 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, 1] += factor_jj * f1 + du[i, jj, 2] += factor_jj * f2 + du[i, jj, 3] += factor_jj * f3 + du[i, jj, 4] += factor_jj * f4 + end end - end + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + j in eachnode(dg), + i in eachnode(dg) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, element] += du[i, j, v] - end + _du[v, i, j, element] += du[i, j, v] + end end diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl index ca6394172ad..c5862579992 100644 --- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # This file collects all methods that have been updated to work with parabolic systems of equations # @@ -12,594 +13,693 @@ # 2. compute f(u, grad(u)) # 3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call) # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))). -function rhs_parabolic!(du, u, t, mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, +function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}}, + equations_parabolic::AbstractEquationsParabolic, initial_condition, boundary_conditions_parabolic, source_terms, dg::DG, parabolic_scheme, cache, cache_parabolic) - @unpack u_transformed, gradients, flux_viscous = cache_parabolic - - # Convert conservative variables to a form more suitable for viscous flux calculations - @trixi_timeit timer() "transform variables" transform_variables!( - u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic) - - # Compute the gradients of the transformed variables - @trixi_timeit timer() "calculate gradient" calc_gradient!( - gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg, - cache, cache_parabolic) - - # Compute and store the viscous fluxes - @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!( - flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic) - - # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it - # computes the divergence of the viscous fluxes) - # - # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have - # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the - # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the - # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it - # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* - # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we - # do not need to recreate the existing data structure only with a different name, and c) we do not - # need to interpolate solutions *and* gradients to the surfaces. - - # TODO: parabolic; reconsider current data structure reuse strategy - - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, flux_viscous, mesh, equations_parabolic, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # TODO: parabolic; extend to mortars - @assert nmortars(dg, cache) == 0 - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations_parabolic, dg, cache_parabolic) - - return nothing + (; u_transformed, gradients, flux_viscous) = cache_parabolic + + # Convert conservative variables to a form more suitable for viscous flux calculations + @trixi_timeit timer() "transform variables" begin + transform_variables!(u_transformed, u, mesh, equations_parabolic, + dg, parabolic_scheme, cache, cache_parabolic) + end + + # Compute the gradients of the transformed variables + @trixi_timeit timer() "calculate gradient" begin + calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic, + boundary_conditions_parabolic, dg, cache, cache_parabolic) + end + + # Compute and store the viscous fluxes + @trixi_timeit timer() "calculate viscous fluxes" begin + calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh, + equations_parabolic, dg, cache, cache_parabolic) + end + + # The remainder of this function is essentially a regular rhs! for parabolic + # equations (i.e., it computes the divergence of the viscous fluxes) + # + # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have + # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the + # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the + # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it + # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* + # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we + # do not need to recreate the existing data structure only with a different name, and c) we do not + # need to interpolate solutions *and* gradients to the surfaces. + + # TODO: parabolic; reconsider current data structure reuse strategy + + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh, + equations_parabolic, dg, cache_parabolic) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_divergence!(cache_parabolic, t, + boundary_conditions_parabolic, mesh, + equations_parabolic, + dg.surface_integral, dg) + end + + # TODO: parabolic; extend to mortars + @assert nmortars(dg, cache) == 0 + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations_parabolic, + dg.surface_integral, dg, cache_parabolic) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic) + end + + return nothing end # Transform solution variables prior to taking the gradient # (e.g., conservative to primitive variables). Defaults to doing nothing. # TODO: can we avoid copying data? -function transform_variables!(u_transformed, u, mesh::TreeMesh{2}, +function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, cache, cache_parabolic) - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations_parabolic, dg, i, j, element) - u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic) - set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, j, element) + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations_parabolic, dg, i, j, element) + u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, + equations_parabolic) + set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, + i, j, element) + end end - end end # This is the version used when calculating the divergence of the viscous fluxes function calc_volume_integral!(du, flux_viscous, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{2}, + equations_parabolic::AbstractEquationsParabolic, dg::DGSEM, cache) - @unpack derivative_dhat = dg.basis - flux_viscous_x, flux_viscous_y = flux_viscous - - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for j in eachnode(dg), i in eachnode(dg) - flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element) - flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element) - - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, j, element) - end - - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, equations_parabolic, dg, i, jj, element) - end + @unpack derivative_dhat = dg.basis + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for j in eachnode(dg), i in eachnode(dg) + flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, + element) + flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, + equations_parabolic, dg, ii, j, element) + end + + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, + equations_parabolic, dg, i, jj, element) + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache_parabolic, flux_viscous, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{2}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache) - @unpack interfaces = cache_parabolic - @unpack orientations = interfaces - - flux_viscous_x, flux_viscous_y = flux_viscous - - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] - - if orientations[interface] == 1 - # interface in x-direction - for j in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j, left_element] - interfaces.u[2, v, j, interface] = flux_viscous_x[v, 1, j, right_element] - end - else # if orientations[interface] == 2 - # interface in y-direction - for i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg), left_element] - interfaces.u[2, v, i, interface] = flux_viscous_y[v, i, 1, right_element] - end + @unpack interfaces = cache_parabolic + @unpack orientations = interfaces + + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] + + if orientations[interface] == 1 + # interface in x-direction + for j in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j, + left_element] + interfaces.u[2, v, j, interface] = flux_viscous_x[v, 1, j, + right_element] + end + else # if orientations[interface] == 2 + # interface in y-direction + for i in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg), + left_element] + interfaces.u[2, v, i, interface] = flux_viscous_y[v, i, 1, + right_element] + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{2}, equations_parabolic, dg::DG, cache_parabolic) - @unpack neighbor_ids, orientations = cache_parabolic.interfaces - - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for i in eachnode(dg) - # Get precomputed fluxes at interfaces - flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic, - dg, i, interface) - - # Compute interface flux as mean of left and right viscous fluxes - # TODO: parabolic; only BR1 at the moment - flux = 0.5 * (flux_ll + flux_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, left_direction, left_id] = flux[v] - surface_flux_values[v, i, right_direction, right_id] = flux[v] - end + @unpack neighbor_ids, orientations = cache_parabolic.interfaces + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in eachnode(dg) + # Get precomputed fluxes at interfaces + flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, + dg, i, interface) + + # Compute interface flux as mean of left and right viscous fluxes + # TODO: parabolic; only BR1 at the moment + flux = 0.5 * (flux_ll + flux_rr) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, left_direction, left_id] = flux[v] + surface_flux_values[v, i, right_direction, right_id] = flux[v] + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes function prolong2boundaries!(cache_parabolic, flux_viscous, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{2}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache) - @unpack boundaries = cache_parabolic - @unpack orientations, neighbor_sides = boundaries - flux_viscous_x, flux_viscous_y = flux_viscous - - @threaded for boundary in eachboundary(dg, cache_parabolic) - element = boundaries.neighbor_ids[boundary] - - if orientations[boundary] == 1 - # boundary in x-direction - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary - for l in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l, element] + @unpack boundaries = cache_parabolic + @unpack orientations, neighbor_sides = boundaries + flux_viscous_x, flux_viscous_y = flux_viscous + + @threaded for boundary in eachboundary(dg, cache_parabolic) + element = boundaries.neighbor_ids[boundary] + + if orientations[boundary] == 1 + # boundary in x-direction + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for l in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l, + element] + end + else # Element in +x direction of boundary + for l in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, l, boundary] = flux_viscous_x[v, 1, l, element] + end + end + else # if orientations[boundary] == 2 + # boundary in y-direction + if neighbor_sides[boundary] == 1 + # element in -y direction of boundary + for l in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg), + element] + end + else + # element in +y direction of boundary + for l in eachnode(dg), v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, l, boundary] = flux_viscous_y[v, l, 1, element] + end + end end - else # Element in +x direction of boundary - for l in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, l, boundary] = flux_viscous_x[v, 1, l, element] - end - end - else # if orientations[boundary] == 2 - # boundary in y-direction - if neighbor_sides[boundary] == 1 - # element in -y direction of boundary - for l in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg), element] - end - else - # element in +y direction of boundary - for l in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, l, boundary] = flux_viscous_y[v, l, 1, element] - end - end end - end - return nothing + return nothing end - -function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{2}, +function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache, cache_parabolic) - gradients_x, gradients_y = gradients - flux_viscous_x, flux_viscous_y = flux_viscous # output arrays - - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Get solution and gradients - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element) - gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, element) - gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, element) - - # Calculate viscous flux and store each component for later use - flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node), 1, equations_parabolic) - flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node), 2, equations_parabolic) - set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, i, j, element) - set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, i, j, element) + gradients_x, gradients_y = gradients + flux_viscous_x, flux_viscous_y = flux_viscous # output arrays + + @threaded for element in eachelement(dg, cache) + for j in eachnode(dg), i in eachnode(dg) + # Get solution and gradients + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, + element) + gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, + element) + gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, + element) + + # Calculate viscous flux and store each component for later use + flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node), 1, + equations_parabolic) + flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node), 2, + equations_parabolic) + set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, + i, j, element) + set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, + i, j, element) + end end - end end - # TODO: parabolic; decide if we should keep this, and if so, extend to 3D. function get_unsigned_normal_vector_2d(direction) - if direction > 4 || direction < 1 - error("Direction = $direction; in 2D, direction should be 1, 2, 3, or 4.") - end - if direction == 1 || direction == 2 - return SVector(1.0, 0.0) - else - return SVector(0.0, 1.0) - end + if direction > 4 || direction < 1 + error("Direction = $direction; in 2D, direction should be 1, 2, 3, or 4.") + end + if direction == 1 || direction == 2 + return SVector(1.0, 0.0) + else + return SVector(0.0, 1.0) + end end -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - return nothing +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::Union{TreeMesh{2}, P4estMesh{2}}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - return nothing + return nothing end -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[3], - equations_parabolic, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[4], - equations_parabolic, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{2}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, dg, + cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, dg, + cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[3], + equations_parabolic, surface_integral, dg, + cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[4], + equations_parabolic, surface_integral, dg, + cache, + 4, firsts[4], lasts[4]) end -function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,4}, t, +function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{ + <:Any, + 4 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for i in eachnode(dg) - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - - # TODO: revisit if we want more general boundary treatments. - # This assumes the gradient numerical flux at the boundary is the gradient variable, - # which is consistent with BR1, LDG. - flux_inner = u_inner - - x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary) - flux = boundary_condition(flux_inner, u_inner, get_unsigned_normal_vector_2d(direction), - x, t, Gradient(), equations_parabolic) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, direction, neighbor] = flux[v] - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for i in eachnode(dg) + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + + # TODO: revisit if we want more general boundary treatments. + # This assumes the gradient numerical flux at the boundary is the gradient variable, + # which is consistent with BR1, LDG. + flux_inner = u_inner + + x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary) + flux = boundary_condition(flux_inner, u_inner, + get_unsigned_normal_vector_2d(direction), + x, t, Gradient(), equations_parabolic) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{2}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[3], - equations_parabolic, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[4], - equations_parabolic, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, + dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, + dg, cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[3], + equations_parabolic, surface_integral, + dg, cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[4], + equations_parabolic, surface_integral, + dg, cache, + 4, firsts[4], lasts[4]) end -function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,4}, t, +function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{ + <:Any, + 4 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - - # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") - # of the viscous flux, as computed in `prolong2boundaries!` - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for i in eachnode(dg) - # Get viscous boundary fluxes - flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - flux_inner = flux_ll - else # Element is on the right, boundary on the left - flux_inner = flux_rr - end - - x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary) - - # TODO: add a field in `cache.boundaries` for gradient information. - # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. - # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and - # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27. - # It will not work with implementations which utilize `u_inner` to impose boundary conditions. - flux = boundary_condition(flux_inner, nothing, get_unsigned_normal_vector_2d(direction), - x, t, Divergence(), equations_parabolic) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, direction, neighbor] = flux[v] - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + + # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") + # of the viscous flux, as computed in `prolong2boundaries!` + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for i in eachnode(dg) + # Get viscous boundary fluxes + flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, + boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + flux_inner = flux_ll + else # Element is on the right, boundary on the left + flux_inner = flux_rr + end + + x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary) + + # TODO: add a field in `cache.boundaries` for gradient information. + # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. + # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and + # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27. + # It will not work with implementations which utilize `u_inner` to impose boundary conditions. + flux = boundary_condition(flux_inner, nothing, + get_unsigned_normal_vector_2d(direction), + x, t, Divergence(), equations_parabolic) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end - # Calculate the gradient of the transformed variables function calc_gradient!(gradients, u_transformed, t, mesh::TreeMesh{2}, equations_parabolic, boundary_conditions_parabolic, dg::DG, cache, cache_parabolic) + gradients_x, gradients_y = gradients - gradients_x, gradients_y = gradients - - # Reset du - @trixi_timeit timer() "reset gradients" begin - reset_du!(gradients_x, dg, cache) - reset_du!(gradients_y, dg, cache) - end - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - @unpack derivative_dhat = dg.basis - @threaded for element in eachelement(dg, cache) - - # Calculate volume terms in one element - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element) + # Reset du + @trixi_timeit timer() "reset gradients" begin + reset_du!(gradients_x, dg, cache) + reset_du!(gradients_y, dg, cache) + end - for ii in eachnode(dg) - multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, element) + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + @unpack derivative_dhat = dg.basis + @threaded for element in eachelement(dg, cache) + + # Calculate volume terms in one element + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], + u_node, equations_parabolic, dg, ii, j, + element) + end + + for jj in eachnode(dg) + multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], + u_node, equations_parabolic, dg, i, jj, + element) + end + end end + end - for jj in eachnode(dg) - multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, element) - end - end + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, u_transformed, mesh, equations_parabolic, + dg.surface_integral, dg) end - end - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + @unpack surface_flux_values = cache_parabolic.elements + @unpack neighbor_ids, orientations = cache_parabolic.interfaces + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in eachnode(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, dg, i, + interface) + flux = 0.5 * (u_ll + u_rr) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, left_direction, left_id] = flux[v] + surface_flux_values[v, i, right_direction, right_id] = flux[v] + end + end + end + end - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - @unpack surface_flux_values = cache_parabolic.elements - @unpack neighbor_ids, orientations = cache_parabolic.interfaces + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, u_transformed, mesh, equations_parabolic, + dg.surface_integral, dg) + end - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for i in eachnode(dg) - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, - equations_parabolic, dg, i, interface) - flux = 0.5 * (u_ll + u_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, left_direction, left_id] = flux[v] - surface_flux_values[v, i, right_direction, right_id] = flux[v] - end - end + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_gradients!(cache_parabolic, t, + boundary_conditions_parabolic, mesh, + equations_parabolic, + dg.surface_integral, dg) end - end - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # TODO: parabolic; mortars - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache_parabolic.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations_parabolic) - # surface at -x - gradients_x[v, 1, l, element] = ( - gradients_x[v, 1, l, element] - surface_flux_values[v, l, 1, element] * factor_1) - - # surface at +x - gradients_x[v, nnodes(dg), l, element] = ( - gradients_x[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2) - - # surface at -y - gradients_y[v, l, 1, element] = ( - gradients_y[v, l, 1, element] - surface_flux_values[v, l, 3, element] * factor_1) - - # surface at +y - gradients_y[v, l, nnodes(dg), element] = ( - gradients_y[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2) + + # TODO: parabolic; mortars + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache_parabolic.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + for v in eachvariable(equations_parabolic) + # surface at -x + gradients_x[v, 1, l, element] = (gradients_x[v, 1, l, element] - + surface_flux_values[v, l, 1, + element] * + factor_1) + + # surface at +x + gradients_x[v, nnodes(dg), l, element] = (gradients_x[v, nnodes(dg), + l, element] + + surface_flux_values[v, l, + 2, + element] * + factor_2) + + # surface at -y + gradients_y[v, l, 1, element] = (gradients_y[v, l, 1, element] - + surface_flux_values[v, l, 3, + element] * + factor_1) + + # surface at +y + gradients_y[v, l, nnodes(dg), element] = (gradients_y[v, l, + nnodes(dg), + element] + + surface_flux_values[v, l, + 4, + element] * + factor_2) + end + end end - end end - end - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" begin - apply_jacobian!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic) - apply_jacobian!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic) - end + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, + cache_parabolic) + apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, + cache_parabolic) + end - return nothing + return nothing end - # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache_parabolic(mesh::TreeMesh{2}, equations_hyperbolic::AbstractEquations, +function create_cache_parabolic(mesh::TreeMesh{2}, + equations_hyperbolic::AbstractEquations, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, + uEltype) - n_vars = nvariables(equations_hyperbolic) - n_nodes = nnodes(elements) - n_elements = nelements(elements) - u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements) - gradients = ntuple(_ -> similar(u_transformed), ndims(mesh)) - flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh)) + n_vars = nvariables(equations_hyperbolic) + n_nodes = nnodes(elements) + n_elements = nelements(elements) + u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements) + gradients = ntuple(_ -> similar(u_transformed), ndims(mesh)) + flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh)) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - # cache = (; elements, interfaces, boundaries, mortars) - cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) + # cache = (; elements, interfaces, boundaries, mortars) + cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) - # Add specialized parts of the cache required to compute the mortars etc. - # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the mortars etc. + # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...) - return cache + return cache end - # Needed to *not* flip the sign of the inverse Jacobian. # This is because the parabolic fluxes are assumed to be of the form # `du/dt + df/dx = dg/dx + source(x,t)`, # where f(u) is the inviscid flux and g(u) is the viscous flux. -function apply_jacobian!(du, mesh::TreeMesh{2}, - equations::AbstractEquationsParabolic, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - factor = cache.elements.inverse_jacobian[element] +function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{2}, P4estMesh{2}}, + equations::AbstractEquationsParabolic, dg::DG, cache) + @threaded for element in eachelement(dg, cache) + factor = cache.elements.inverse_jacobian[element] - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end end - end - return nothing + return nothing end - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index a7c6a8b4746..8095dae123a 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -3,734 +3,789 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # everything related to a DG semidiscretization in 2D using MPI, # currently limited to Lobatto-Legendre nodes - # TODO: MPI dimension agnostic mutable struct MPICache{uEltype <: Real} - mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} - mpi_send_requests::Vector{MPI.Request} - mpi_recv_requests::Vector{MPI.Request} - n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} - n_elements_global::Int - first_element_global_id::Int + mpi_neighbor_ranks::Vector{Int} + mpi_neighbor_interfaces::Vector{Vector{Int}} + mpi_neighbor_mortars::Vector{Vector{Int}} + mpi_send_buffers::Vector{Vector{uEltype}} + mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_send_requests::Vector{MPI.Request} + mpi_recv_requests::Vector{MPI.Request} + n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} + n_elements_global::Int + first_element_global_id::Int end - function MPICache(uEltype) - # MPI communication "just works" for bitstypes only - if !isbitstype(uEltype) - throw(ArgumentError("MPICache only supports bitstypes, $uEltype is not a bitstype.")) - end - mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_send_requests = Vector{MPI.Request}(undef, 0) - mpi_recv_requests = Vector{MPI.Request}(undef, 0) - n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) - n_elements_global = 0 - first_element_global_id = 0 - - MPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + # MPI communication "just works" for bitstypes only + if !isbitstype(uEltype) + throw(ArgumentError("MPICache only supports bitstypes, $uEltype is not a bitstype.")) + end + mpi_neighbor_ranks = Vector{Int}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_send_requests = Vector{MPI.Request}(undef, 0) + mpi_recv_requests = Vector{MPI.Request}(undef, 0) + n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) + n_elements_global = 0 + first_element_global_id = 0 + + MPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::MPICache{uEltype}) where uEltype = uEltype - +@inline Base.eltype(::MPICache{uEltype}) where {uEltype} = uEltype # TODO: MPI dimension agnostic function start_mpi_receive!(mpi_cache::MPICache) + for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) + mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index], + d, d, mpi_comm()) + end - for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) - mpi_cache.mpi_recv_requests[index] = MPI.Irecv!( - mpi_cache.mpi_recv_buffers[index], d, d, mpi_comm()) - end - - return nothing + return nothing end - # TODO: MPI dimension agnostic function start_mpi_send!(mpi_cache::MPICache, mesh, equations, dg, cache) - data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) + data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) - for d in 1:length(mpi_cache.mpi_neighbor_ranks) - send_buffer = mpi_cache.mpi_send_buffers[d] + for d in 1:length(mpi_cache.mpi_neighbor_ranks) + send_buffer = mpi_cache.mpi_send_buffers[d] - for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) - first = (index - 1) * data_size + 1 - last = (index - 1) * data_size + data_size - - if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction - @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[2, :, :, interface]) - else # local element in negative direction - @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[1, :, :, interface]) - end - end + for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size - # Each mortar has a total size of 4 * data_size, set everything to NaN first and overwrite the - # parts where local data exists - interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size - mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * 4 * data_size - send_buffer[interfaces_data_size+1:interfaces_data_size+mortars_data_size] .= NaN - - for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) - # First and last indices in the send buffer for mortar data obtained from local element - # in a given position - index_base = interfaces_data_size + (index - 1) * 4 * data_size - indices = ( - # first, last for local element in position 1 (lower element) - (index_base + 1, - index_base + 1 * data_size), - # first, last for local element in position 2 (upper element) - (index_base + 1 * data_size + 1, - index_base + 2 * data_size), - # firsts, lasts for local element in position 3 (large element) - (index_base + 2 * data_size + 1, - index_base + 3 * data_size, - index_base + 3 * data_size + 1, - index_base + 4 * data_size), - ) - - for position in cache.mpi_mortars.local_neighbor_positions[mortar] - # Determine whether the data belongs to the left or right side - if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side - if position in (1, 2) # small element - leftright = 2 - else # large element - leftright = 1 - end - else # large element on right side - if position in (1, 2) # small element - leftright = 1 - else # large element - leftright = 2 - end + if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction + @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[2, :, :, + interface]) + else # local element in negative direction + @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[1, :, :, + interface]) + end end - # copy data to buffer - if position == 1 # lower element - first, last = indices[position] - @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) - elseif position == 2 # upper element - first, last = indices[position] - @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) - else # large element - first_lower, last_lower, first_upper, last_upper = indices[position] - @views send_buffer[first_lower:last_lower] .= vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) - @views send_buffer[first_upper:last_upper] .= vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) + + # Each mortar has a total size of 4 * data_size, set everything to NaN first and overwrite the + # parts where local data exists + interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size + mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * 4 * data_size + send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN + + for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) + # First and last indices in the send buffer for mortar data obtained from local element + # in a given position + index_base = interfaces_data_size + (index - 1) * 4 * data_size + indices = ( + # first, last for local element in position 1 (lower element) + (index_base + 1, + index_base + 1 * data_size), + # first, last for local element in position 2 (upper element) + (index_base + 1 * data_size + 1, + index_base + 2 * data_size), + # firsts, lasts for local element in position 3 (large element) + (index_base + 2 * data_size + 1, + index_base + 3 * data_size, + index_base + 3 * data_size + 1, + index_base + 4 * data_size)) + + for position in cache.mpi_mortars.local_neighbor_positions[mortar] + # Determine whether the data belongs to the left or right side + if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side + if position in (1, 2) # small element + leftright = 2 + else # large element + leftright = 1 + end + else # large element on right side + if position in (1, 2) # small element + leftright = 1 + else # large element + leftright = 2 + end + end + # copy data to buffer + if position == 1 # lower element + first, last = indices[position] + @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_lower[leftright, + :, + :, + mortar]) + elseif position == 2 # upper element + first, last = indices[position] + @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_upper[leftright, + :, + :, + mortar]) + else # large element + first_lower, last_lower, first_upper, last_upper = indices[position] + @views send_buffer[first_lower:last_lower] .= vec(cache.mpi_mortars.u_lower[leftright, + :, + :, + mortar]) + @views send_buffer[first_upper:last_upper] .= vec(cache.mpi_mortars.u_upper[leftright, + :, + :, + mortar]) + end + end end - end end - end - # Start sending - for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) - mpi_cache.mpi_send_requests[index] = MPI.Isend( - mpi_cache.mpi_send_buffers[index], d, mpi_rank(), mpi_comm()) - end + # Start sending + for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks) + mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index], + d, mpi_rank(), mpi_comm()) + end - return nothing + return nothing end - # TODO: MPI dimension agnostic function finish_mpi_send!(mpi_cache::MPICache) - MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status) + MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status) end - # TODO: MPI dimension agnostic function finish_mpi_receive!(mpi_cache::MPICache, mesh, equations, dg, cache) - data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) - - # Start receiving and unpack received data until all communication is finished - d = MPI.Waitany(mpi_cache.mpi_recv_requests) - while d !== nothing - recv_buffer = mpi_cache.mpi_recv_buffers[d] - - for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) - first = (index - 1) * data_size + 1 - last = (index - 1) * data_size + data_size - - if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction - @views vec(cache.mpi_interfaces.u[1, :, :, interface]) .= recv_buffer[first:last] - else # local element in negative direction - @views vec(cache.mpi_interfaces.u[2, :, :, interface]) .= recv_buffer[first:last] - end - end + data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) - interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size - for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) - # First and last indices in the receive buffer for mortar data obtained from remote element - # in a given position - index_base = interfaces_data_size + (index - 1) * 4 * data_size - indices = ( - # first, last for local element in position 1 (lower element) - (index_base + 1, - index_base + 1 * data_size), - # first, last for local element in position 2 (upper element) - (index_base + 1 * data_size + 1, - index_base + 2 * data_size), - # firsts, lasts for local element in position 3 (large element) - (index_base + 2 * data_size + 1, - index_base + 3 * data_size, - index_base + 3 * data_size + 1, - index_base + 4 * data_size), - ) - - for position in 1:3 - # Skip if received data for `pos` is NaN as no real data has been sent for the - # corresponding element - if isnan(recv_buffer[Base.first(indices[position])]) - continue - end + # Start receiving and unpack received data until all communication is finished + d = MPI.Waitany(mpi_cache.mpi_recv_requests) + while d !== nothing + recv_buffer = mpi_cache.mpi_recv_buffers[d] + + for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size - # Determine whether the received data belongs to the left or right side - if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side - if position in (1, 2) # small element - leftright = 2 - else # large element - leftright = 1 - end - else # large element on right side - if position in (1, 2) # small element - leftright = 1 - else # large element - leftright = 2 - end + if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction + @views vec(cache.mpi_interfaces.u[1, :, :, interface]) .= recv_buffer[first:last] + else # local element in negative direction + @views vec(cache.mpi_interfaces.u[2, :, :, interface]) .= recv_buffer[first:last] + end end - if position == 1 # lower element data has been received - first, last = indices[position] - @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first:last] - elseif position == 2 # upper element data has been received - first, last = indices[position] - @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first:last] - else # large element data has been received - first_lower, last_lower, first_upper, last_upper = indices[position] - @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first_lower:last_lower] - @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first_upper:last_upper] + interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size + for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d]) + # First and last indices in the receive buffer for mortar data obtained from remote element + # in a given position + index_base = interfaces_data_size + (index - 1) * 4 * data_size + indices = ( + # first, last for local element in position 1 (lower element) + (index_base + 1, + index_base + 1 * data_size), + # first, last for local element in position 2 (upper element) + (index_base + 1 * data_size + 1, + index_base + 2 * data_size), + # firsts, lasts for local element in position 3 (large element) + (index_base + 2 * data_size + 1, + index_base + 3 * data_size, + index_base + 3 * data_size + 1, + index_base + 4 * data_size)) + + for position in 1:3 + # Skip if received data for `pos` is NaN as no real data has been sent for the + # corresponding element + if isnan(recv_buffer[Base.first(indices[position])]) + continue + end + + # Determine whether the received data belongs to the left or right side + if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side + if position in (1, 2) # small element + leftright = 2 + else # large element + leftright = 1 + end + else # large element on right side + if position in (1, 2) # small element + leftright = 1 + else # large element + leftright = 2 + end + end + + if position == 1 # lower element data has been received + first, last = indices[position] + @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first:last] + elseif position == 2 # upper element data has been received + first, last = indices[position] + @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first:last] + else # large element data has been received + first_lower, last_lower, first_upper, last_upper = indices[position] + @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first_lower:last_lower] + @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first_upper:last_upper] + end + end end - end - end - d = MPI.Waitany(mpi_cache.mpi_recv_requests) - end + d = MPI.Waitany(mpi_cache.mpi_recv_requests) + end - return nothing + return nothing end - # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. function create_cache(mesh::ParallelTreeMesh{2}, equations, - dg::DG, RealT, ::Type{uEltype}) where {uEltype<:Real} - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + dg::DG, RealT, ::Type{uEltype}) where {uEltype <: Real} + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, elements) + mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - mpi_mortars = init_mpi_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + mpi_mortars = init_mpi_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - mpi_cache = init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, - nvariables(equations), nnodes(dg), uEltype) + mpi_cache = init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, + nvariables(equations), nnodes(dg), uEltype) - cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars, + cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars, mpi_cache) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) - return cache + return cache end +function init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, + uEltype) + mpi_cache = MPICache(uEltype) -function init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype) - mpi_cache = MPICache(uEltype) + init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, + nnodes, uEltype) + return mpi_cache +end - init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype) - return mpi_cache +function init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, + nnodes, uEltype) + mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(elements, + mpi_interfaces, + mpi_mortars, + mesh) + + mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, + mpi_neighbor_mortars, + ndims(mesh), + nvars, + nnodes, + uEltype) + + # Determine local and total number of elements + n_elements_by_rank = Vector{Int}(undef, mpi_nranks()) + n_elements_by_rank[mpi_rank() + 1] = nelements(elements) + MPI.Allgather!(MPI.UBuffer(n_elements_by_rank, 1), mpi_comm()) + n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) + n_elements_global = MPI.Allreduce(nelements(elements), +, mpi_comm()) + @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + + # Determine the global element id of the first element + first_element_global_id = MPI.Exscan(nelements(elements), +, mpi_comm()) + if mpi_isroot() + # With Exscan, the result on the first rank is undefined + first_element_global_id = 1 + else + # On all other ranks we need to add one, since Julia has one-based indices + first_element_global_id += 1 + end + # TODO reuse existing structures + @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id end +# Initialize connectivity between MPI neighbor ranks +function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, + mesh::TreeMesh2D) + tree = mesh.tree + + # Determine neighbor ranks and sides for MPI interfaces + neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)) + # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by + # number of directions (2 * ndims) plus direction minus one + global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + for interface_id in 1:nmpiinterfaces(mpi_interfaces) + orientation = mpi_interfaces.orientations[interface_id] + remote_side = mpi_interfaces.remote_sides[interface_id] + # Direction is from local cell to remote cell + if orientation == 1 # MPI interface in x-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 1 + else # remote cell on the "right" of MPI interface + direction = 2 + end + else # MPI interface in y-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 3 + else # remote cell on the "right" of MPI interface + direction = 4 + end + end + local_neighbor_id = mpi_interfaces.local_neighbor_ids[interface_id] + local_cell_id = elements.cell_ids[local_neighbor_id] + remote_cell_id = tree.neighbor_ids[direction, local_cell_id] + neighbor_ranks_interface[interface_id] = tree.mpi_ranks[remote_cell_id] + if local_cell_id < remote_cell_id + global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + + direction - 1 + else + global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id + + opposite_direction(direction) - 1) + end + end -function init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = - init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, mesh) - - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = - init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, ndims(mesh), nvars, nnodes, uEltype) - - # Determine local and total number of elements - n_elements_by_rank = Vector{Int}(undef, mpi_nranks()) - n_elements_by_rank[mpi_rank() + 1] = nelements(elements) - MPI.Allgather!(MPI.UBuffer(n_elements_by_rank, 1), mpi_comm()) - n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) - n_elements_global = MPI.Allreduce(nelements(elements), +, mpi_comm()) - @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements" - - # Determine the global element id of the first element - first_element_global_id = MPI.Exscan(nelements(elements), +, mpi_comm()) - if mpi_isroot() - # With Exscan, the result on the first rank is undefined - first_element_global_id = 1 - else - # On all other ranks we need to add one, since Julia has one-based indices - first_element_global_id += 1 - end - # TODO reuse existing structures - @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id -end + # Determine neighbor ranks for MPI mortars + neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars)) + # The global mortar id is the (globally unique) large cell id, multiplied by + # number of directions (2 * ndims) plus direction minus one where + # direction = 1 for mortars in x-direction where large element is left + # direction = 2 for mortars in x-direction where large element is right + # direction = 3 for mortars in y-direction where large element is left + # direction = 4 for mortars in y-direction where large element is right + global_mortar_ids = fill(-1, nmpimortars(mpi_mortars)) + for mortar in 1:nmpimortars(mpi_mortars) + neighbor_ranks_mortar[mortar] = Vector{Int}() + + orientation = mpi_mortars.orientations[mortar] + large_side = mpi_mortars.large_sides[mortar] + direction = (orientation - 1) * 2 + large_side + + local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar] + local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar] + if 3 in local_neighbor_positions # large element is on this rank + large_element_id = local_neighbor_ids[findfirst(pos -> pos == 3, + local_neighbor_positions)] + large_cell_id = elements.cell_ids[large_element_id] + else # large element is remote + cell_id = elements.cell_ids[first(local_neighbor_ids)] + large_cell_id = tree.neighbor_ids[direction, tree.parent_ids[cell_id]] + end + + neighbor_cell_id = tree.neighbor_ids[opposite_direction(direction), + large_cell_id] + if direction == 1 + lower_cell_id = tree.child_ids[1, neighbor_cell_id] + upper_cell_id = tree.child_ids[3, neighbor_cell_id] + elseif direction == 2 + lower_cell_id = tree.child_ids[2, neighbor_cell_id] + upper_cell_id = tree.child_ids[4, neighbor_cell_id] + elseif direction == 3 + lower_cell_id = tree.child_ids[1, neighbor_cell_id] + upper_cell_id = tree.child_ids[2, neighbor_cell_id] + else + lower_cell_id = tree.child_ids[3, neighbor_cell_id] + upper_cell_id = tree.child_ids[4, neighbor_cell_id] + end + for cell_id in (lower_cell_id, upper_cell_id, large_cell_id) + if !is_own_cell(tree, cell_id) + neighbor_rank = tree.mpi_ranks[cell_id] + if !(neighbor_rank in neighbor_ranks_mortar[mortar]) + push!(neighbor_ranks_mortar[mortar], neighbor_rank) + end + end + end -# Initialize connectivity between MPI neighbor ranks -function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, mesh::TreeMesh2D) - tree = mesh.tree - - # Determine neighbor ranks and sides for MPI interfaces - neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces)) - # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by - # number of directions (2 * ndims) plus direction minus one - global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) - for interface_id in 1:nmpiinterfaces(mpi_interfaces) - orientation = mpi_interfaces.orientations[interface_id] - remote_side = mpi_interfaces.remote_sides[interface_id] - # Direction is from local cell to remote cell - if orientation == 1 # MPI interface in x-direction - if remote_side == 1 # remote cell on the "left" of MPI interface - direction = 1 - else # remote cell on the "right" of MPI interface - direction = 2 - end - else # MPI interface in y-direction - if remote_side == 1 # remote cell on the "left" of MPI interface - direction = 3 - else # remote cell on the "right" of MPI interface - direction = 4 - end + global_mortar_ids[mortar] = 2 * ndims(tree) * large_cell_id + direction - 1 end - local_neighbor_id = mpi_interfaces.local_neighbor_ids[interface_id] - local_cell_id = elements.cell_ids[local_neighbor_id] - remote_cell_id = tree.neighbor_ids[direction, local_cell_id] - neighbor_ranks_interface[interface_id] = tree.mpi_ranks[remote_cell_id] - if local_cell_id < remote_cell_id - global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + direction - 1 - else - global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id + - opposite_direction(direction) - 1) + + # Get sorted, unique neighbor ranks + mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> + sort |> unique + + # Sort interfaces by global interface id + p = sortperm(global_interface_ids) + neighbor_ranks_interface .= neighbor_ranks_interface[p] + interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] + + # Sort mortars by global mortar id + p = sortperm(global_mortar_ids) + neighbor_ranks_mortar .= neighbor_ranks_mortar[p] + mortar_ids = collect(1:nmpimortars(mpi_mortars))[p] + + # For each neighbor rank, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + for (index, d) in enumerate(mpi_neighbor_ranks) + mpi_neighbor_interfaces[index] = interface_ids[findall(x -> (x == d), + neighbor_ranks_interface)] + mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (d in x), + neighbor_ranks_mortar)] end - end - - # Determine neighbor ranks for MPI mortars - neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars)) - # The global mortar id is the (globally unique) large cell id, multiplied by - # number of directions (2 * ndims) plus direction minus one where - # direction = 1 for mortars in x-direction where large element is left - # direction = 2 for mortars in x-direction where large element is right - # direction = 3 for mortars in y-direction where large element is left - # direction = 4 for mortars in y-direction where large element is right - global_mortar_ids = fill(-1, nmpimortars(mpi_mortars)) - for mortar in 1:nmpimortars(mpi_mortars) - neighbor_ranks_mortar[mortar] = Vector{Int}() - - orientation = mpi_mortars.orientations[mortar] - large_side = mpi_mortars.large_sides[mortar] - direction = (orientation - 1) * 2 + large_side - - local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar] - local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar] - if 3 in local_neighbor_positions # large element is on this rank - large_element_id = local_neighbor_ids[findfirst(pos -> pos == 3, local_neighbor_positions)] - large_cell_id = elements.cell_ids[large_element_id] - else # large element is remote - cell_id = elements.cell_ids[first(local_neighbor_ids)] - large_cell_id = tree.neighbor_ids[direction, tree.parent_ids[cell_id]] + + # Sanity checks that we counted all interfaces exactly once + @assert sum(length(v) for v in mpi_neighbor_interfaces) == + nmpiinterfaces(mpi_interfaces) + + return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars +end + +function rhs!(du, u, t, + mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}}, equations, + initial_condition, boundary_conditions, source_terms::Source, + dg::DG, cache) where {Source} + # Start to receive MPI data + @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) + + # Prolong solution to MPI interfaces + @trixi_timeit timer() "prolong2mpiinterfaces" begin + prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg) end - neighbor_cell_id = tree.neighbor_ids[opposite_direction(direction), large_cell_id] - if direction == 1 - lower_cell_id = tree.child_ids[1, neighbor_cell_id] - upper_cell_id = tree.child_ids[3, neighbor_cell_id] - elseif direction == 2 - lower_cell_id = tree.child_ids[2, neighbor_cell_id] - upper_cell_id = tree.child_ids[4, neighbor_cell_id] - elseif direction == 3 - lower_cell_id = tree.child_ids[1, neighbor_cell_id] - upper_cell_id = tree.child_ids[2, neighbor_cell_id] - else - lower_cell_id = tree.child_ids[3, neighbor_cell_id] - upper_cell_id = tree.child_ids[4, neighbor_cell_id] + # Prolong solution to MPI mortars + @trixi_timeit timer() "prolong2mpimortars" begin + prolong2mpimortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) end - for cell_id in (lower_cell_id, upper_cell_id, large_cell_id) - if !is_own_cell(tree, cell_id) - neighbor_rank = tree.mpi_ranks[cell_id] - if !(neighbor_rank in neighbor_ranks_mortar[mortar]) - push!(neighbor_ranks_mortar[mortar], neighbor_rank) - end - end + # Start to send MPI data + @trixi_timeit timer() "start MPI send" begin + start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache) end - global_mortar_ids[mortar] = 2 * ndims(tree) * large_cell_id + direction - 1 - end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - # Get sorted, unique neighbor ranks - mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> sort |> unique + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end - # Sort interfaces by global interface id - p = sortperm(global_interface_ids) - neighbor_ranks_interface .= neighbor_ranks_interface[p] - interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] + # Prolong solution to interfaces + # TODO: Taal decide order of arguments, consistent vs. modified cache first? + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, + dg.surface_integral, dg) + end - # Sort mortars by global mortar id - p = sortperm(global_mortar_ids) - neighbor_ranks_mortar .= neighbor_ranks_mortar[p] - mortar_ids = collect(1:nmpimortars(mpi_mortars))[p] + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end - # For each neighbor rank, init connectivity data structures - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) - for (index, d) in enumerate(mpi_neighbor_ranks) - mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_ranks_interface)] - mpi_neighbor_mortars[index] = mortar_ids[findall(x->(d in x), neighbor_ranks_mortar)] - end + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, + dg.surface_integral, dg) + end - # Sanity checks that we counted all interfaces exactly once - @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end - return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars -end + # Prolong solution to mortars + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + # Finish to receive MPI data + @trixi_timeit timer() "finish MPI receive" begin + finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache) + end -function rhs!(du, u, t, - mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}}, equations, - initial_condition, boundary_conditions, source_terms::Source, - dg::DG, cache) where {Source} - # Start to receive MPI data - @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache) - - # Prolong solution to MPI interfaces - @trixi_timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to MPI mortars - @trixi_timeit timer() "prolong2mpimortars" prolong2mpimortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Start to send MPI data - @trixi_timeit timer() "start MPI send" start_mpi_send!( - cache.mpi_cache, mesh, equations, dg, cache) - - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - # TODO: Taal decide order of arguments, consistent vs. modified cache first? - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" prolong2mortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" calc_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Finish to receive MPI data - @trixi_timeit timer() "finish MPI receive" finish_mpi_receive!( - cache.mpi_cache, mesh, equations, dg, cache) - - # Calculate MPI interface fluxes - @trixi_timeit timer() "MPI interface flux" calc_mpi_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Calculate MPI mortar fluxes - @trixi_timeit timer() "MPI mortar flux" calc_mpi_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - # Finish to send MPI data - @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache) - - return nothing -end + # Calculate MPI interface fluxes + @trixi_timeit timer() "MPI interface flux" begin + calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + # Calculate MPI mortar fluxes + @trixi_timeit timer() "MPI mortar flux" begin + calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end -function prolong2mpiinterfaces!(cache, u, - mesh::ParallelTreeMesh{2}, - equations, surface_integral, dg::DG) - @unpack mpi_interfaces = cache + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end - @threaded for interface in eachmpiinterface(dg, cache) - local_element = mpi_interfaces.local_neighbor_ids[interface] + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) - if mpi_interfaces.orientations[interface] == 1 # interface in x-direction - if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction - for j in eachnode(dg), v in eachvariable(equations) - mpi_interfaces.u[2, v, j, interface] = u[v, 1, j, local_element] - end - else # local element in negative direction - for j in eachnode(dg), v in eachvariable(equations) - mpi_interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, local_element] - end - end - else # interface in y-direction - if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction - for i in eachnode(dg), v in eachvariable(equations) - mpi_interfaces.u[2, v, i, interface] = u[v, i, 1, local_element] - end - else # local element in negative direction - for i in eachnode(dg), v in eachvariable(equations) - mpi_interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), local_element] - end - end + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) end - end - return nothing -end + # Finish to send MPI data + @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache) + return nothing +end -function prolong2mpimortars!(cache, u, - mesh::ParallelTreeMesh{2}, equations, - mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - @unpack mpi_mortars = cache +function prolong2mpiinterfaces!(cache, u, + mesh::ParallelTreeMesh{2}, + equations, surface_integral, dg::DG) + @unpack mpi_interfaces = cache - @threaded for mortar in eachmpimortar(dg, cache) - local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar] - local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar] + @threaded for interface in eachmpiinterface(dg, cache) + local_element = mpi_interfaces.local_neighbor_ids[interface] - for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) - if position in (1, 2) # Current element is small - # Copy solution small to small - if mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side - if mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - if position == 1 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_lower[2, v, l, mortar] = u[v, 1, l, element] - end - end - else # position == 2 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_upper[2, v, l, mortar] = u[v, 1, l, element] - end - end - end - else - # L2 mortars in y-direction - if position == 1 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_lower[2, v, l, mortar] = u[v, l, 1, element] + if mpi_interfaces.orientations[interface] == 1 # interface in x-direction + if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction + for j in eachnode(dg), v in eachvariable(equations) + mpi_interfaces.u[2, v, j, interface] = u[v, 1, j, local_element] end - end - else # position == 2 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_upper[2, v, l, mortar] = u[v, l, 1, element] + else # local element in negative direction + for j in eachnode(dg), v in eachvariable(equations) + mpi_interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, + local_element] end - end end - end - else # large_sides[mortar] == 2 -> small elements on left side - if mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - if position == 1 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, element] + else # interface in y-direction + if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction + for i in eachnode(dg), v in eachvariable(equations) + mpi_interfaces.u[2, v, i, interface] = u[v, i, 1, local_element] end - end - else # position == 2 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, element] + else # local element in negative direction + for i in eachnode(dg), v in eachvariable(equations) + mpi_interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), + local_element] end - end end - else - # L2 mortars in y-direction - if position == 1 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), element] + end + end + + return nothing +end + +function prolong2mpimortars!(cache, u, + mesh::ParallelTreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, surface_integral, + dg::DGSEM) + @unpack mpi_mortars = cache + + @threaded for mortar in eachmpimortar(dg, cache) + local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar] + local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar] + + for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) + if position in (1, 2) # Current element is small + # Copy solution small to small + if mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side + if mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + if position == 1 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_lower[2, v, l, mortar] = u[v, 1, l, + element] + end + end + else # position == 2 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_upper[2, v, l, mortar] = u[v, 1, l, + element] + end + end + end + else + # L2 mortars in y-direction + if position == 1 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_lower[2, v, l, mortar] = u[v, l, 1, + element] + end + end + else # position == 2 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_upper[2, v, l, mortar] = u[v, l, 1, + element] + end + end + end + end + else # large_sides[mortar] == 2 -> small elements on left side + if mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + if position == 1 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_lower[1, v, l, mortar] = u[v, + nnodes(dg), + l, element] + end + end + else # position == 2 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_upper[1, v, l, mortar] = u[v, + nnodes(dg), + l, element] + end + end + end + else + # L2 mortars in y-direction + if position == 1 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_lower[1, v, l, mortar] = u[v, l, + nnodes(dg), + element] + end + end + else # position == 2 + for l in eachnode(dg) + for v in eachvariable(equations) + mpi_mortars.u_upper[1, v, l, mortar] = u[v, l, + nnodes(dg), + element] + end + end + end + end end - end - else # position == 2 - for l in eachnode(dg) - for v in eachvariable(equations) - mpi_mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), element] + else # position == 3 -> current element is large + # Interpolate large element face data to small interface locations + if mpi_mortars.large_sides[mortar] == 1 # -> large element on left side + leftright = 1 + if mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, nnodes(dg), :, element) + element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, nnodes(dg), element) + element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, + mortar, u_large) + end + else # large_sides[mortar] == 2 -> large element on right side + leftright = 2 + if mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, 1, :, element) + element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, 1, element) + element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, + mortar, u_large) + end end - end end - end - end - else # position == 3 -> current element is large - # Interpolate large element face data to small interface locations - if mpi_mortars.large_sides[mortar] == 1 # -> large element on left side - leftright = 1 - if mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, nnodes(dg), :, element) - element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large) - else - # L2 mortars in y-direction - u_large = view(u, :, :, nnodes(dg), element) - element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large) - end - else # large_sides[mortar] == 2 -> large element on right side - leftright = 2 - if mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, 1, :, element) - element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large) - else - # L2 mortars in y-direction - u_large = view(u, :, :, 1, element) - element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large) - end end - end end - end - return nothing + return nothing end - function calc_mpi_interface_flux!(surface_flux_values, mesh::ParallelTreeMesh{2}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u, local_neighbor_ids, orientations, remote_sides = cache.mpi_interfaces - - @threaded for interface in eachmpiinterface(dg, cache) - # Get local neighboring element - element = local_neighbor_ids[interface] - - # Determine interface direction with respect to element: - if orientations[interface] == 1 # interface in x-direction - if remote_sides[interface] == 1 # local element in positive direction - direction = 1 - else # local element in negative direction - direction = 2 - end - else # interface in y-direction - if remote_sides[interface] == 1 # local element in positive direction - direction = 3 - else # local element in negative direction - direction = 4 - end - end + @unpack surface_flux = surface_integral + @unpack u, local_neighbor_ids, orientations, remote_sides = cache.mpi_interfaces + + @threaded for interface in eachmpiinterface(dg, cache) + # Get local neighboring element + element = local_neighbor_ids[interface] + + # Determine interface direction with respect to element: + if orientations[interface] == 1 # interface in x-direction + if remote_sides[interface] == 1 # local element in positive direction + direction = 1 + else # local element in negative direction + direction = 2 + end + else # interface in y-direction + if remote_sides[interface] == 1 # local element in positive direction + direction = 3 + else # local element in negative direction + direction = 4 + end + end - for i in eachnode(dg) - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) - flux = surface_flux(u_ll, u_rr, orientations[interface], equations) + for i in eachnode(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) + flux = surface_flux(u_ll, u_rr, orientations[interface], equations) - # Copy flux to local element storage - for v in eachvariable(equations) - surface_flux_values[v, i, direction, element] = flux[v] - end + # Copy flux to local element storage + for v in eachvariable(equations) + surface_flux_values[v, i, direction, element] = flux[v] + end + end end - end - return nothing + return nothing end - function calc_mpi_mortar_flux!(surface_flux_values, mesh::ParallelTreeMesh{2}, nonconservative_terms::False, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u_lower, u_upper, orientations = cache.mpi_mortars - @unpack fstar_upper_threaded, fstar_lower_threaded = cache - - @threaded for mortar in eachmpimortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar_upper = fstar_upper_threaded[Threads.threadid()] - fstar_lower = fstar_lower_threaded[Threads.threadid()] - - # Calculate fluxes - orientation = orientations[mortar] - calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation) - calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation) - - mpi_mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, - mortar, fstar_upper, fstar_lower) - end - - return nothing + @unpack surface_flux = surface_integral + @unpack u_lower, u_upper, orientations = cache.mpi_mortars + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + + @threaded for mortar in eachmpimortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper = fstar_upper_threaded[Threads.threadid()] + fstar_lower = fstar_lower_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, + orientation) + calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, + orientation) + + mpi_mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar_upper, fstar_lower) + end + + return nothing end @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values, @@ -738,64 +793,61 @@ end mortar_l2::LobattoLegendreMortarL2, dg::DGSEM, cache, mortar, fstar_upper, fstar_lower) - local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] - local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] - - for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) - if position in (1, 2) # Current element is small - # Copy flux small to small - if cache.mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - else - # L2 mortars in y-direction - direction = 3 - end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - else - # L2 mortars in y-direction - direction = 4 - end - end - - if position == 1 - surface_flux_values[:, :, direction, element] .= fstar_lower - elseif position == 2 - surface_flux_values[:, :, direction, element] .= fstar_upper - end - else # position == 3 -> current element is large - # Project small fluxes to large element - if cache.mpi_mortars.large_sides[mortar] == 1 # -> large element on left side - if cache.mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - else - # L2 mortars in y-direction - direction = 4 - end - else # large_sides[mortar] == 2 -> large element on right side - if cache.mpi_mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - else - # L2 mortars in y-direction - direction = 3 - end - end - - multiply_dimensionwise!( - view(surface_flux_values, :, :, direction, element), mortar_l2.reverse_upper, fstar_upper, - mortar_l2.reverse_lower, fstar_lower) - end - end + local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar] + local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar] - return nothing -end + for (element, position) in zip(local_neighbor_ids, local_neighbor_positions) + if position in (1, 2) # Current element is small + # Copy flux small to small + if cache.mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end + end + if position == 1 + surface_flux_values[:, :, direction, element] .= fstar_lower + elseif position == 2 + surface_flux_values[:, :, direction, element] .= fstar_upper + end + else # position == 3 -> current element is large + # Project small fluxes to large element + if cache.mpi_mortars.large_sides[mortar] == 1 # -> large element on left side + if cache.mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end + else # large_sides[mortar] == 2 -> large element on right side + if cache.mpi_mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end + end + multiply_dimensionwise!(view(surface_flux_values, :, :, direction, element), + mortar_l2.reverse_upper, fstar_upper, + mortar_l2.reverse_lower, fstar_lower) + end + end -end # @muladd \ No newline at end of file + return nothing +end +end # @muladd diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index aef86e3de7d..95abb2595e5 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # everything related to a DG semidiscretization in 3D, # currently limited to Lobatto-Legendre nodes @@ -13,357 +13,403 @@ # the RHS etc. function create_cache(mesh::TreeMesh{3}, equations, dg::DG, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - cache = (; elements, interfaces, boundaries, mortars) + cache = (; elements, interfaces, boundaries, mortars) - # Add specialized parts of the cache required to compute the volume integral etc. - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...) - return cache + return cache end - # The methods below are specialized on the volume integral type # and called from the basic `create_cache` method at the top. function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, - equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype) - NamedTuple() + equations, volume_integral::VolumeIntegralFluxDifferencing, + dg::DG, uEltype) + NamedTuple() end - -function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations, +function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype) - element_ids_dg = Int[] - element_ids_dgfv = Int[] - - cache = create_cache(mesh, equations, - VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), - dg, uEltype) - - A4dp1_x = Array{uEltype, 4} - A4dp1_y = Array{uEltype, 4} - A4dp1_z = Array{uEltype, 4} - fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1) - for _ in 1:Threads.nthreads()] - fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1) - for _ in 1:Threads.nthreads()] - - return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, fstar1_R_threaded, + element_ids_dg = Int[] + element_ids_dgfv = Int[] + + cache = create_cache(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype) + + A4dp1_x = Array{uEltype, 4} + A4dp1_y = Array{uEltype, 4} + A4dp1_z = Array{uEltype, 4} + fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg), nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg), nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1, nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1, nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + + return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, + fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded) end - -function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations, - volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype) - - A4dp1_x = Array{uEltype, 4} - A4dp1_y = Array{uEltype, 4} - A4dp1_z = Array{uEltype, 4} - fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg)) - for _ in 1:Threads.nthreads()] - fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1) - for _ in 1:Threads.nthreads()] - fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1) - for _ in 1:Threads.nthreads()] - - return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, +function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + equations, + volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, + uEltype) + A4dp1_x = Array{uEltype, 4} + A4dp1_y = Array{uEltype, 4} + A4dp1_z = Array{uEltype, 4} + fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg), nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1, + nnodes(dg), nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1, nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), + nnodes(dg) + 1, nnodes(dg)) + for _ in 1:Threads.nthreads()] + fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), + nnodes(dg), nnodes(dg) + 1) + for _ in 1:Threads.nthreads()] + + return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, + fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded) end - # The methods below are specialized on the mortar type # and called from the basic `create_cache` method at the top. -function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype) - # TODO: Taal compare performance of different types - A3d = Array{uEltype, 3} - fstar_upper_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - fstar_upper_right_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - fstar_lower_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - fstar_lower_right_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - fstar_tmp1_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] - - (; fstar_upper_left_threaded, fstar_upper_right_threaded, +function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + equations, mortar_l2::LobattoLegendreMortarL2, uEltype) + # TODO: Taal compare performance of different types + A3d = Array{uEltype, 3} + fstar_upper_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + fstar_upper_right_threaded = A3d[A3d(undef, nvariables(equations), + nnodes(mortar_l2), nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + fstar_lower_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + fstar_lower_right_threaded = A3d[A3d(undef, nvariables(equations), + nnodes(mortar_l2), nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + fstar_tmp1_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2)) + for _ in 1:Threads.nthreads()] + + (; fstar_upper_left_threaded, fstar_upper_right_threaded, fstar_lower_left_threaded, fstar_lower_right_threaded, fstar_tmp1_threaded) end - # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? function rhs!(du, u, t, mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" prolong2mortars!( - cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg) - - # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" calc_mortar_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.mortar, dg.surface_integral, dg, cache) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Prolong solution to mortars + @trixi_timeit timer() "prolong2mortars" begin + prolong2mortars!(cache, u, mesh, equations, + dg.mortar, dg.surface_integral, dg) + end + + # Calculate mortar fluxes + @trixi_timeit timer() "mortar flux" begin + calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.mortar, dg.surface_integral, dg, cache) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing +end function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3} + }, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) + @threaded for element in eachelement(dg, cache) + weak_form_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + dg, cache) + end - @threaded for element in eachelement(dg, cache) - weak_form_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - dg, cache) - end - - return nothing + return nothing end @inline function weak_form_kernel!(du, u, element, mesh::TreeMesh{3}, nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_dhat = dg.basis + dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_dhat = dg.basis - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) - flux1 = flux(u_node, 1, equations) - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, j, k, element) - end + flux1 = flux(u_node, 1, equations) + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, + equations, dg, ii, j, k, element) + end - flux2 = flux(u_node, 2, equations) - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, equations, dg, i, jj, k, element) - end + flux2 = flux(u_node, 2, equations) + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, + equations, dg, i, jj, k, element) + end - flux3 = flux(u_node, 3, equations) - for kk in eachnode(dg) - multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], flux3, equations, dg, i, j, kk, element) + flux3 = flux(u_node, 3, equations) + for kk in eachnode(dg) + multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], flux3, + equations, dg, i, j, kk, element) + end end - end - return nothing + return nothing end - function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3} + }, nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) - @threaded for element in eachelement(dg, cache) - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_integral.volume_flux, dg, cache) - end + @threaded for element in eachelement(dg, cache) + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_integral.volume_flux, dg, cache) + end end @inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, nonconservative_terms::False, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - - # Calculate volume integral in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - - # All diagonal entries of `derivative_split` are zero. Thus, we can skip - # the computation of the diagonal terms. In addition, we use the symmetry - # of the `volume_flux` to save half of the possible two-point flux - # computations. - - # x direction - for ii in (i+1):nnodes(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) - flux1 = volume_flux(u_node, u_node_ii, 1, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, j, k, element) - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis - # y direction - for jj in (j+1):nnodes(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) - flux2 = volume_flux(u_node, u_node_jj, 2, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, equations, dg, i, jj, k, element) - end + # Calculate volume integral in one element + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):nnodes(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + flux1 = volume_flux(u_node, u_node_ii, 1, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, + equations, dg, ii, j, k, element) + end + + # y direction + for jj in (j + 1):nnodes(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + flux2 = volume_flux(u_node, u_node_jj, 2, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, + equations, dg, i, jj, k, element) + end - # z direction - for kk in (k+1):nnodes(dg) - u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) - flux3 = volume_flux(u_node, u_node_kk, 3, equations) - multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], flux3, equations, dg, i, j, k, element) - multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], flux3, equations, dg, i, j, kk, element) + # z direction + for kk in (k + 1):nnodes(dg) + u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + flux3 = volume_flux(u_node, u_node_kk, 3, equations) + multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], flux3, + equations, dg, i, j, k, element) + multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], flux3, + equations, dg, i, j, kk, element) + end end - end end @inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, nonconservative_terms::True, equations, - volume_flux, dg::DGSEM, cache, alpha=true) - # true * [some floating point value] == [exactly the same floating point value] - # This can (hopefully) be optimized away due to constant propagation. - @unpack derivative_split = dg.basis - symmetric_flux, nonconservative_flux = volume_flux - - # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha) - - # Calculate the remaining volume terms using the nonsymmetric generalized flux - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - - # The diagonal terms are zero since the diagonal of `derivative_split` - # is zero. We ignore this for now. - - # x direction - integral_contribution = zero(u_node) - for ii in eachnode(dg) - u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) - noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) - integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1 - end + volume_flux, dg::DGSEM, cache, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + @unpack derivative_split = dg.basis + symmetric_flux, nonconservative_flux = volume_flux - # y direction - for jj in eachnode(dg) - u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) - noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations) - integral_contribution = integral_contribution + derivative_split[j, jj] * noncons_flux2 - end + # Apply the symmetric flux as usual + flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + dg, cache, alpha) - # z direction - for kk in eachnode(dg) - u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) - noncons_flux3 = nonconservative_flux(u_node, u_node_kk, 3, equations) - integral_contribution = integral_contribution + derivative_split[k, kk] * noncons_flux3 - end + # Calculate the remaining volume terms using the nonsymmetric generalized flux + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + + # The diagonal terms are zero since the diagonal of `derivative_split` + # is zero. We ignore this for now. + + # x direction + integral_contribution = zero(u_node) + for ii in eachnode(dg) + u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element) + noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations) + integral_contribution = integral_contribution + + derivative_split[i, ii] * noncons_flux1 + end - # The factor 0.5 cancels the factor 2 in the flux differencing form - multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, k, element) - end -end + # y direction + for jj in eachnode(dg) + u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element) + noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations) + integral_contribution = integral_contribution + + derivative_split[j, jj] * noncons_flux2 + end + # z direction + for kk in eachnode(dg) + u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element) + noncons_flux3 = nonconservative_flux(u_node, u_node_kk, 3, equations) + integral_contribution = integral_contribution + + derivative_split[k, kk] * noncons_flux3 + end + + # The factor 0.5 cancels the factor 2 in the flux differencing form + multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, + dg, i, j, k, element) + end +end # TODO: Taal dimension agnostic function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3} + }, nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) - @unpack element_ids_dg, element_ids_dgfv = cache - @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral - - # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α - alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache) - - # Determine element ids for DG-only and blended DG-FV volume integral - pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) - - # Loop over pure DG elements - @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) - element = element_ids_dg[idx_element] - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache) - end + @unpack element_ids_dg, element_ids_dgfv = cache + @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral + + # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α + alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, + cache) + + # Determine element ids for DG-only and blended DG-FV volume integral + pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache) + + # Loop over pure DG elements + @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg) + element = element_ids_dg[idx_element] + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache) + end - # Loop over blended DG-FV elements - @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) - element = element_ids_dgfv[idx_element] - alpha_element = alpha[element] + # Loop over blended DG-FV elements + @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv) + element = element_ids_dgfv[idx_element] + alpha_element = alpha[element] - # Calculate DG volume integral contribution - flux_differencing_kernel!(du, u, element, mesh, - nonconservative_terms, equations, - volume_flux_dg, dg, cache, 1 - alpha_element) + # Calculate DG volume integral contribution + flux_differencing_kernel!(du, u, element, mesh, + nonconservative_terms, equations, + volume_flux_dg, dg, cache, 1 - alpha_element) - # Calculate FV volume integral contribution - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, alpha_element) - end + # Calculate FV volume integral contribution + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, alpha_element) + end - return nothing + return nothing end # TODO: Taal dimension agnostic @@ -372,575 +418,650 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache) - @unpack volume_flux_fv = volume_integral + @unpack volume_flux_fv = volume_integral - # Calculate LGL FV volume integral - @threaded for element in eachelement(dg, cache) - fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, - dg, cache, element, true) - end + # Calculate LGL FV volume integral + @threaded for element in eachelement(dg, cache) + fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv, + dg, cache, element, true) + end - return nothing + return nothing end - @inline function fv_kernel!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms, equations, - volume_flux_fv, dg::DGSEM, cache, element, alpha=true) - @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache - @unpack inverse_weights = dg.basis - - # Calculate FV two-point fluxes - fstar1_L = fstar1_L_threaded[Threads.threadid()] - fstar2_L = fstar2_L_threaded[Threads.threadid()] - fstar3_L = fstar3_L_threaded[Threads.threadid()] - fstar1_R = fstar1_R_threaded[Threads.threadid()] - fstar2_R = fstar2_R_threaded[Threads.threadid()] - fstar3_R = fstar3_R_threaded[Threads.threadid()] - - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh, nonconservative_terms, equations, volume_flux_fv, dg, element, cache) - - # Calculate FV volume integral contribution - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, k, element] += ( alpha * - (inverse_weights[i] * (fstar1_L[v, i+1, j, k] - fstar1_R[v, i, j, k]) + - inverse_weights[j] * (fstar2_L[v, i, j+1, k] - fstar2_R[v, i, j, k]) + - inverse_weights[k] * (fstar3_L[v, i, j, k+1] - fstar3_R[v, i, j, k])) ) + volume_flux_fv, dg::DGSEM, cache, element, alpha = true) + @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache + @unpack inverse_weights = dg.basis + + # Calculate FV two-point fluxes + fstar1_L = fstar1_L_threaded[Threads.threadid()] + fstar2_L = fstar2_L_threaded[Threads.threadid()] + fstar3_L = fstar3_L_threaded[Threads.threadid()] + fstar1_R = fstar1_R_threaded[Threads.threadid()] + fstar2_R = fstar2_R_threaded[Threads.threadid()] + fstar3_R = fstar3_R_threaded[Threads.threadid()] + + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, + mesh, nonconservative_terms, equations, volume_flux_fv, dg, element, + cache) + # Calculate FV volume integral contribution + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, k, element] += (alpha * + (inverse_weights[i] * + (fstar1_L[v, i + 1, j, k] - + fstar1_R[v, i, j, k]) + + inverse_weights[j] * + (fstar2_L[v, i, j + 1, k] - + fstar2_R[v, i, j, k]) + + inverse_weights[k] * + (fstar3_L[v, i, j, k + 1] - + fstar3_R[v, i, j, k]))) + end end - end - return nothing + return nothing end - # Calculate the finite volume fluxes inside the elements (**without non-conservative terms**). -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, nonconservative_terms::False, equations, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, + fstar3_R, u, + mesh::TreeMesh{3}, nonconservative_terms::False, + equations, volume_flux_fv, dg::DGSEM, element, cache) + fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R)) + + for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction + set_node_vars!(fstar1_L, flux, equations, dg, i, j, k) + set_node_vars!(fstar1_R, flux, equations, dg, i, j, k) + end - fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R)) - - for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction - set_node_vars!(fstar1_L, flux, equations, dg, i, j, k) - set_node_vars!(fstar1_R, flux, equations, dg, i, j, k) - end - - fstar2_L[:, :, 1 , :] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 , :] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R)) - - for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction - set_node_vars!(fstar2_L, flux, equations, dg, i, j, k) - set_node_vars!(fstar2_R, flux, equations, dg, i, j, k) - end - - fstar3_L[:, :, :, 1 ] .= zero(eltype(fstar3_L)) - fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L)) - fstar3_R[:, :, :, 1 ] .= zero(eltype(fstar3_R)) - fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R)) - - for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j, k-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - flux = volume_flux_fv(u_ll, u_rr, 3, equations) # orientation 3: z direction - set_node_vars!(fstar3_L, flux, equations, dg, i, j, k) - set_node_vars!(fstar3_R, flux, equations, dg, i, j, k) - end - - return nothing -end + fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R)) + + for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction + set_node_vars!(fstar2_L, flux, equations, dg, i, j, k) + set_node_vars!(fstar2_R, flux, equations, dg, i, j, k) + end + fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L)) + fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L)) + fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R)) + fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R)) + + for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + flux = volume_flux_fv(u_ll, u_rr, 3, equations) # orientation 3: z direction + set_node_vars!(fstar3_L, flux, equations, dg, i, j, k) + set_node_vars!(fstar3_R, flux, equations, dg, i, j, k) + end + + return nothing +end # Calculate the finite volume fluxes inside the elements (**without non-conservative terms**). -@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, +@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, + fstar3_R, u, mesh::TreeMesh{3}, nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) - volume_flux, nonconservative_flux = volume_flux_fv - - fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) - fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L)) - fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) - fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R)) - - for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) - u_ll = get_node_vars(u, equations, dg, i-1, j, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - - # Compute conservative part - flux = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) - flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) - - set_node_vars!(fstar1_L, flux_L, equations, dg, i, j, k) - set_node_vars!(fstar1_R, flux_R, equations, dg, i, j, k) - end - - fstar2_L[:, :, 1 , :] .= zero(eltype(fstar2_L)) - fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L)) - fstar2_R[:, :, 1 , :] .= zero(eltype(fstar2_R)) - fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R)) - - for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j-1, k, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - - # Compute conservative part - flux = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations) - flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations) - - set_node_vars!(fstar2_L, flux_L, equations, dg, i, j, k) - set_node_vars!(fstar2_R, flux_R, equations, dg, i, j, k) - end - - fstar3_L[:, :, :, 1 ] .= zero(eltype(fstar3_L)) - fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L)) - fstar3_R[:, :, :, 1 ] .= zero(eltype(fstar3_R)) - fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R)) - - for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg) - u_ll = get_node_vars(u, equations, dg, i, j, k-1, element) - u_rr = get_node_vars(u, equations, dg, i, j, k, element) - - # Compute conservative part - flux = volume_flux(u_ll, u_rr, 3, equations) # orientation 3: z direction - - # Compute nonconservative part - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 3, equations) - flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 3, equations) - - set_node_vars!(fstar3_L, flux_L, equations, dg, i, j, k) - set_node_vars!(fstar3_R, flux_R, equations, dg, i, j, k) - end - - return nothing -end + volume_flux, nonconservative_flux = volume_flux_fv + + fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L)) + fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R)) + fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R)) + + for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) + u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + # Compute conservative part + flux = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction + + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations) + flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations) + + set_node_vars!(fstar1_L, flux_L, equations, dg, i, j, k) + set_node_vars!(fstar1_R, flux_R, equations, dg, i, j, k) + end + + fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L)) + fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L)) + fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R)) + fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R)) + + for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + # Compute conservative part + flux = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction + + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations) + flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations) + + set_node_vars!(fstar2_L, flux_L, equations, dg, i, j, k) + set_node_vars!(fstar2_R, flux_R, equations, dg, i, j, k) + end + + fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L)) + fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L)) + fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R)) + fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R)) + for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg) + u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element) + u_rr = get_node_vars(u, equations, dg, i, j, k, element) + + # Compute conservative part + flux = volume_flux(u_ll, u_rr, 3, equations) # orientation 3: z direction + + # Compute nonconservative part + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 3, equations) + flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 3, equations) + + set_node_vars!(fstar3_L, flux_L, equations, dg, i, j, k) + set_node_vars!(fstar3_R, flux_R, equations, dg, i, j, k) + end + + return nothing +end # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, surface_integral, dg::DG) - @unpack interfaces = cache - @unpack orientations = interfaces - - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] - - if orientations[interface] == 1 - # interface in x-direction - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, j, k, interface] = u[v, nnodes(dg), j, k, left_element] - interfaces.u[2, v, j, k, interface] = u[v, 1, j, k, right_element] - end - elseif orientations[interface] == 2 - # interface in y-direction - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, k, interface] = u[v, i, nnodes(dg), k, left_element] - interfaces.u[2, v, i, k, interface] = u[v, i, 1, k, right_element] - end - else # if orientations[interface] == 3 - # interface in z-direction - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, i, j, nnodes(dg), left_element] - interfaces.u[2, v, i, j, interface] = u[v, i, j, 1, right_element] - end + @unpack interfaces = cache + @unpack orientations = interfaces + + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] + + if orientations[interface] == 1 + # interface in x-direction + for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, j, k, interface] = u[v, nnodes(dg), j, k, + left_element] + interfaces.u[2, v, j, k, interface] = u[v, 1, j, k, right_element] + end + elseif orientations[interface] == 2 + # interface in y-direction + for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, k, interface] = u[v, i, nnodes(dg), k, + left_element] + interfaces.u[2, v, i, k, interface] = u[v, i, 1, k, right_element] + end + else # if orientations[interface] == 3 + # interface in z-direction + for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, j, interface] = u[v, i, j, nnodes(dg), + left_element] + interfaces.u[2, v, i, j, interface] = u[v, i, j, 1, right_element] + end + end end - end - return nothing + return nothing end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{3}, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - # orientation = 3: left -> 6, right -> 5 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + # orientation = 3: left -> 6, right -> 5 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 - for j in eachnode(dg), i in eachnode(dg) - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) - flux = surface_flux(u_ll, u_rr, orientations[interface], equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, i, j, left_direction, left_id] = flux[v] - surface_flux_values[v, i, j, right_direction, right_id] = flux[v] - end + for j in eachnode(dg), i in eachnode(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) + flux = surface_flux(u_ll, u_rr, orientations[interface], equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, j, left_direction, left_id] = flux[v] + surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + end + end end - end end function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{3}, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, neighbor_ids, orientations = cache.interfaces - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - # orientation = 3: left -> 6, right -> 5 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] - for j in eachnode(dg), i in eachnode(dg) - # Call pointwise Riemann solver - orientation = orientations[interface] - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) - flux = surface_flux(u_ll, u_rr, orientation, equations) + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + # orientation = 3: left -> 6, right -> 5 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 - # Compute both nonconservative fluxes - noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) - noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, i, j, left_direction, left_id] = flux[v] + 0.5 * noncons_left[v] - surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v] - end + for j in eachnode(dg), i in eachnode(dg) + # Call pointwise Riemann solver + orientation = orientations[interface] + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) + + # Compute both nonconservative fluxes + noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations) + noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, i, j, left_direction, left_id] = flux[v] + + 0.5 * + noncons_left[v] + surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + + 0.5 * + noncons_right[v] + end + end end - end - return nothing + return nothing end - function prolong2boundaries!(cache, u, mesh::TreeMesh{3}, equations, surface_integral, dg::DG) - @unpack boundaries = cache - @unpack orientations, neighbor_sides = boundaries - - @threaded for boundary in eachboundary(dg, cache) - element = boundaries.neighbor_ids[boundary] - - if orientations[boundary] == 1 - # boundary in x-direction - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) - boundaries.u[1, v, j, k, boundary] = u[v, nnodes(dg), j, k, element] - end - else # Element in +x direction of boundary - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) - boundaries.u[2, v, j, k, boundary] = u[v, 1, j, k, element] + @unpack boundaries = cache + @unpack orientations, neighbor_sides = boundaries + + @threaded for boundary in eachboundary(dg, cache) + element = boundaries.neighbor_ids[boundary] + + if orientations[boundary] == 1 + # boundary in x-direction + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) + boundaries.u[1, v, j, k, boundary] = u[v, nnodes(dg), j, k, element] + end + else # Element in +x direction of boundary + for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations) + boundaries.u[2, v, j, k, boundary] = u[v, 1, j, k, element] + end + end + elseif orientations[boundary] == 2 + # boundary in y-direction + if neighbor_sides[boundary] == 1 + # element in -y direction of boundary + for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + boundaries.u[1, v, i, k, boundary] = u[v, i, nnodes(dg), k, element] + end + else + # element in +y direction of boundary + for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + boundaries.u[2, v, i, k, boundary] = u[v, i, 1, k, element] + end + end + else #if orientations[boundary] == 3 + # boundary in z-direction + if neighbor_sides[boundary] == 1 + # element in -z direction of boundary + for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + boundaries.u[1, v, i, j, boundary] = u[v, i, j, nnodes(dg), element] + end + else + # element in +z direction of boundary + for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) + boundaries.u[2, v, i, j, boundary] = u[v, i, j, 1, element] + end + end end - end - elseif orientations[boundary] == 2 - # boundary in y-direction - if neighbor_sides[boundary] == 1 - # element in -y direction of boundary - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - boundaries.u[1, v, i, k, boundary] = u[v, i, nnodes(dg), k, element] - end - else - # element in +y direction of boundary - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - boundaries.u[2, v, i, k, boundary] = u[v, i, 1, k, element] - end - end - else #if orientations[boundary] == 3 - # boundary in z-direction - if neighbor_sides[boundary] == 1 - # element in -z direction of boundary - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - boundaries.u[1, v, i, j, boundary] = u[v, i, j, nnodes(dg), element] - end - else - # element in +z direction of boundary - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations) - boundaries.u[2, v, i, j, boundary] = u[v, i, j, 1, element] - end - end end - end - return nothing + return nothing end # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic, mesh::TreeMesh{3}, equations, surface_integral, dg::DG) - @assert isempty(eachboundary(dg, cache)) + @assert isempty(eachboundary(dg, cache)) end function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple, mesh::TreeMesh{3}, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], - equations, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], - equations, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3], - equations, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4], - equations, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[5], - equations, surface_integral, dg, cache, - 5, firsts[5], lasts[5]) - calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[6], - equations, surface_integral, dg, cache, - 6, firsts[6], lasts[6]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1], + equations, surface_integral, dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2], + equations, surface_integral, dg, cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3], + equations, surface_integral, dg, cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4], + equations, surface_integral, dg, cache, + 4, firsts[4], lasts[4]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[5], + equations, surface_integral, dg, cache, + 5, firsts[5], lasts[5]) + calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[6], + equations, surface_integral, dg, cache, + 6, firsts[6], lasts[6]) end -function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,5}, t, +function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 5}, + t, boundary_condition, equations, surface_integral, dg::DG, cache, direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] - for j in eachnode(dg), i in eachnode(dg) - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - x = get_node_coords(node_coordinates, equations, dg, i, j, boundary) - flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux, - equations) - - # Copy flux to left and right element storage - for v in eachvariable(equations) - surface_flux_values[v, i, j, direction, neighbor] = flux[v] - end + for j in eachnode(dg), i in eachnode(dg) + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + x = get_node_coords(node_coordinates, equations, dg, i, j, boundary) + flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, + surface_flux, + equations) + + # Copy flux to left and right element storage + for v in eachvariable(equations) + surface_flux_values[v, i, j, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end - function prolong2mortars!(cache, u, mesh::TreeMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM) - # temporary buffer for projections - @unpack fstar_tmp1_threaded = cache - - @threaded for mortar in eachmortar(dg, cache) - fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] - - lower_left_element = cache.mortars.neighbor_ids[1, mortar] - lower_right_element = cache.mortars.neighbor_ids[2, mortar] - upper_left_element = cache.mortars.neighbor_ids[3, mortar] - upper_right_element = cache.mortars.neighbor_ids[4, mortar] - large_element = cache.mortars.neighbor_ids[5, mortar] - - # Copy solution small to small - if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - for k in eachnode(dg), j in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[2, v, j, k, mortar] = u[v, 1, j, k, upper_left_element] - cache.mortars.u_upper_right[2, v, j, k, mortar] = u[v, 1, j, k, upper_right_element] - cache.mortars.u_lower_left[2, v, j, k, mortar] = u[v, 1, j, k, lower_left_element] - cache.mortars.u_lower_right[2, v, j, k, mortar] = u[v, 1, j, k, lower_right_element] - end + # temporary buffer for projections + @unpack fstar_tmp1_threaded = cache + + @threaded for mortar in eachmortar(dg, cache) + fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] + + lower_left_element = cache.mortars.neighbor_ids[1, mortar] + lower_right_element = cache.mortars.neighbor_ids[2, mortar] + upper_left_element = cache.mortars.neighbor_ids[3, mortar] + upper_right_element = cache.mortars.neighbor_ids[4, mortar] + large_element = cache.mortars.neighbor_ids[5, mortar] + + # Copy solution small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for k in eachnode(dg), j in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[2, v, j, k, mortar] = u[v, 1, j, k, + upper_left_element] + cache.mortars.u_upper_right[2, v, j, k, mortar] = u[v, 1, j, k, + upper_right_element] + cache.mortars.u_lower_left[2, v, j, k, mortar] = u[v, 1, j, k, + lower_left_element] + cache.mortars.u_lower_right[2, v, j, k, mortar] = u[v, 1, j, k, + lower_right_element] + end + end + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + for k in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[2, v, i, k, mortar] = u[v, i, 1, k, + upper_left_element] + cache.mortars.u_upper_right[2, v, i, k, mortar] = u[v, i, 1, k, + upper_right_element] + cache.mortars.u_lower_left[2, v, i, k, mortar] = u[v, i, 1, k, + lower_left_element] + cache.mortars.u_lower_right[2, v, i, k, mortar] = u[v, i, 1, k, + lower_right_element] + end + end + else # orientations[mortar] == 3 + # L2 mortars in z-direction + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[2, v, i, j, mortar] = u[v, i, j, 1, + upper_left_element] + cache.mortars.u_upper_right[2, v, i, j, mortar] = u[v, i, j, 1, + upper_right_element] + cache.mortars.u_lower_left[2, v, i, j, mortar] = u[v, i, j, 1, + lower_left_element] + cache.mortars.u_lower_right[2, v, i, j, mortar] = u[v, i, j, 1, + lower_right_element] + end + end + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for k in eachnode(dg), j in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[1, v, j, k, mortar] = u[v, + nnodes(dg), + j, k, + upper_left_element] + cache.mortars.u_upper_right[1, v, j, k, mortar] = u[v, + nnodes(dg), + j, k, + upper_right_element] + cache.mortars.u_lower_left[1, v, j, k, mortar] = u[v, + nnodes(dg), + j, k, + lower_left_element] + cache.mortars.u_lower_right[1, v, j, k, mortar] = u[v, + nnodes(dg), + j, k, + lower_right_element] + end + end + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + for k in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[1, v, i, k, mortar] = u[v, i, + nnodes(dg), + k, + upper_left_element] + cache.mortars.u_upper_right[1, v, i, k, mortar] = u[v, i, + nnodes(dg), + k, + upper_right_element] + cache.mortars.u_lower_left[1, v, i, k, mortar] = u[v, i, + nnodes(dg), + k, + lower_left_element] + cache.mortars.u_lower_right[1, v, i, k, mortar] = u[v, i, + nnodes(dg), + k, + lower_right_element] + end + end + else # if cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + for j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper_left[1, v, i, j, mortar] = u[v, i, j, + nnodes(dg), + upper_left_element] + cache.mortars.u_upper_right[1, v, i, j, mortar] = u[v, i, j, + nnodes(dg), + upper_right_element] + cache.mortars.u_lower_left[1, v, i, j, mortar] = u[v, i, j, + nnodes(dg), + lower_left_element] + cache.mortars.u_lower_right[1, v, i, j, mortar] = u[v, i, j, + nnodes(dg), + lower_right_element] + end + end + end end - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - for k in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[2, v, i, k, mortar] = u[v, i, 1, k, upper_left_element] - cache.mortars.u_upper_right[2, v, i, k, mortar] = u[v, i, 1, k, upper_right_element] - cache.mortars.u_lower_left[2, v, i, k, mortar] = u[v, i, 1, k, lower_left_element] - cache.mortars.u_lower_right[2, v, i, k, mortar] = u[v, i, 1, k, lower_right_element] - end - end - else # orientations[mortar] == 3 - # L2 mortars in z-direction - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[2, v, i, j, mortar] = u[v, i, j, 1, upper_left_element] - cache.mortars.u_upper_right[2, v, i, j, mortar] = u[v, i, j, 1, upper_right_element] - cache.mortars.u_lower_left[2, v, i, j, mortar] = u[v, i, j, 1, lower_left_element] - cache.mortars.u_lower_right[2, v, i, j, mortar] = u[v, i, j, 1, lower_right_element] - end - end - end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - for k in eachnode(dg), j in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, upper_left_element] - cache.mortars.u_upper_right[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, upper_right_element] - cache.mortars.u_lower_left[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, lower_left_element] - cache.mortars.u_lower_right[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, lower_right_element] - end - end - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - for k in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, upper_left_element] - cache.mortars.u_upper_right[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, upper_right_element] - cache.mortars.u_lower_left[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, lower_left_element] - cache.mortars.u_lower_right[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, lower_right_element] - end - end - else # if cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - for j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - cache.mortars.u_upper_left[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), upper_left_element] - cache.mortars.u_upper_right[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), upper_right_element] - cache.mortars.u_lower_left[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), lower_left_element] - cache.mortars.u_lower_right[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), lower_right_element] - end - end - end - end - # Interpolate large element face data to small interface locations - if cache.mortars.large_sides[mortar] == 1 # -> large element on left side - leftright = 1 - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, nnodes(dg), :, :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - u_large = view(u, :, :, nnodes(dg), :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - else # cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - u_large = view(u, :, :, :, nnodes(dg), large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - end - else # large_sides[mortar] == 2 -> large element on right side - leftright = 2 - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - u_large = view(u, :, 1, :, :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - u_large = view(u, :, :, 1, :, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - else # cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - u_large = view(u, :, :, :, 1, large_element) - element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1) - end + # Interpolate large element face data to small interface locations + if cache.mortars.large_sides[mortar] == 1 # -> large element on left side + leftright = 1 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, nnodes(dg), :, :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + u_large = view(u, :, :, nnodes(dg), :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + else # cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + u_large = view(u, :, :, :, nnodes(dg), large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + end + else # large_sides[mortar] == 2 -> large element on right side + leftright = 2 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, 1, :, :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + u_large = view(u, :, :, 1, :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + else # cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + u_large = view(u, :, :, :, 1, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large, fstar_tmp1) + end + end end - end - return nothing + return nothing end -@inline function element_solutions_to_mortars!(mortars, mortar_l2::LobattoLegendreMortarL2, leftright, mortar, - u_large::AbstractArray{<:Any,3}, fstar_tmp1) - multiply_dimensionwise!(view(mortars.u_upper_left, leftright, :, :, :, mortar), mortar_l2.forward_lower, mortar_l2.forward_upper, u_large, fstar_tmp1) - multiply_dimensionwise!(view(mortars.u_upper_right, leftright, :, :, :, mortar), mortar_l2.forward_upper, mortar_l2.forward_upper, u_large, fstar_tmp1) - multiply_dimensionwise!(view(mortars.u_lower_left, leftright, :, :, :, mortar), mortar_l2.forward_lower, mortar_l2.forward_lower, u_large, fstar_tmp1) - multiply_dimensionwise!(view(mortars.u_lower_right, leftright, :, :, :, mortar), mortar_l2.forward_upper, mortar_l2.forward_lower, u_large, fstar_tmp1) - return nothing +@inline function element_solutions_to_mortars!(mortars, + mortar_l2::LobattoLegendreMortarL2, + leftright, mortar, + u_large::AbstractArray{<:Any, 3}, + fstar_tmp1) + multiply_dimensionwise!(view(mortars.u_upper_left, leftright, :, :, :, mortar), + mortar_l2.forward_lower, mortar_l2.forward_upper, u_large, + fstar_tmp1) + multiply_dimensionwise!(view(mortars.u_upper_right, leftright, :, :, :, mortar), + mortar_l2.forward_upper, mortar_l2.forward_upper, u_large, + fstar_tmp1) + multiply_dimensionwise!(view(mortars.u_lower_left, leftright, :, :, :, mortar), + mortar_l2.forward_lower, mortar_l2.forward_lower, u_large, + fstar_tmp1) + multiply_dimensionwise!(view(mortars.u_lower_right, leftright, :, :, :, mortar), + mortar_l2.forward_upper, mortar_l2.forward_lower, u_large, + fstar_tmp1) + return nothing end - function calc_mortar_flux!(surface_flux_values, mesh::TreeMesh{3}, nonconservative_terms::False, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations = cache.mortars - @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded, - fstar_lower_left_threaded, fstar_lower_right_threaded, - fstar_tmp1_threaded) = cache - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()] - fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()] - fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()] - fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()] - fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] - - # Calculate fluxes - orientation = orientations[mortar] - calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar, orientation) - calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, mortar, orientation) - calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar, orientation) - calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, mortar, orientation) - - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, mortar, - fstar_upper_left, fstar_upper_right, - fstar_lower_left, fstar_lower_right, - fstar_tmp1) - end - - return nothing + @unpack surface_flux = surface_integral + @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations = cache.mortars + @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded, + fstar_lower_left_threaded, fstar_lower_right_threaded, + fstar_tmp1_threaded) = cache + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()] + fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()] + fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()] + fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()] + fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar, + orientation) + calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, + mortar, orientation) + calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar, + orientation) + calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, + mortar, orientation) + + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, mortar, + fstar_upper_left, fstar_upper_right, + fstar_lower_left, fstar_lower_right, + fstar_tmp1) + end + + return nothing end function calc_mortar_flux!(surface_flux_values, @@ -948,96 +1069,143 @@ function calc_mortar_flux!(surface_flux_values, nonconservative_terms::True, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations, large_sides = cache.mortars - @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded, - fstar_lower_left_threaded, fstar_lower_right_threaded, - fstar_tmp1_threaded) = cache - - @threaded for mortar in eachmortar(dg, cache) - # Choose thread-specific pre-allocated container - fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()] - fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()] - fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()] - fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()] - fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] - - # Calculate fluxes - orientation = orientations[mortar] - calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar, orientation) - calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, mortar, orientation) - calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar, orientation) - calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, mortar, orientation) - - # Add nonconservative fluxes. - # These need to be adapted on the geometry (left/right) since the order of - # the arguments matters, based on the global SBP operator interpretation. - # The same interpretation (global SBP operators coupled discontinuously via - # central fluxes/SATs) explains why we need the factor 0.5. - # Alternatively, you can also follow the argumentation of Bohm et al. 2018 - # ("nonconservative diamond flux") - if large_sides[mortar] == 1 # -> small elements on right side - for j in eachnode(dg), i in eachnode(dg) - # Pull the left and right solutions - u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left, equations, dg, i, j, mortar) - u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, equations, dg, i, j, mortar) - u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left, equations, dg, i, j, mortar) - u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, equations, dg, i, j, mortar) - # Call pointwise nonconservative term - noncons_upper_left = nonconservative_flux(u_upper_left_ll, u_upper_left_rr, orientation, equations) - noncons_upper_right = nonconservative_flux(u_upper_right_ll, u_upper_right_rr, orientation, equations) - noncons_lower_left = nonconservative_flux(u_lower_left_ll, u_lower_left_rr, orientation, equations) - noncons_lower_right = nonconservative_flux(u_lower_right_ll, u_lower_right_rr, orientation, equations) - # Add to primary and secondary temporary storage - multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, equations, dg, i, j) - end - else # large_sides[mortar] == 2 -> small elements on the left - for j in eachnode(dg), i in eachnode(dg) - # Pull the left and right solutions - u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left, equations, dg, i, j, mortar) - u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, equations, dg, i, j, mortar) - u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left, equations, dg, i, j, mortar) - u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, equations, dg, i, j, mortar) - # Call pointwise nonconservative term - noncons_upper_left = nonconservative_flux(u_upper_left_rr, u_upper_left_ll, orientation, equations) - noncons_upper_right = nonconservative_flux(u_upper_right_rr, u_upper_right_ll, orientation, equations) - noncons_lower_left = nonconservative_flux(u_lower_left_rr, u_lower_left_ll, orientation, equations) - noncons_lower_right = nonconservative_flux(u_lower_right_rr, u_lower_right_ll, orientation, equations) - # Add to primary and secondary temporary storage - multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left, equations, dg, i, j) - multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, equations, dg, i, j) - end - end + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations, large_sides = cache.mortars + @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded, + fstar_lower_left_threaded, fstar_lower_right_threaded, + fstar_tmp1_threaded) = cache + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()] + fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()] + fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()] + fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()] + fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar, + orientation) + calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, + mortar, orientation) + calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar, + orientation) + calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, + mortar, orientation) + + # Add nonconservative fluxes. + # These need to be adapted on the geometry (left/right) since the order of + # the arguments matters, based on the global SBP operator interpretation. + # The same interpretation (global SBP operators coupled discontinuously via + # central fluxes/SATs) explains why we need the factor 0.5. + # Alternatively, you can also follow the argumentation of Bohm et al. 2018 + # ("nonconservative diamond flux") + if large_sides[mortar] == 1 # -> small elements on right side + for j in eachnode(dg), i in eachnode(dg) + # Pull the left and right solutions + u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left, + equations, dg, + i, j, mortar) + u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, + equations, + dg, i, j, + mortar) + u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left, + equations, dg, + i, j, mortar) + u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, + equations, + dg, i, j, + mortar) + # Call pointwise nonconservative term + noncons_upper_left = nonconservative_flux(u_upper_left_ll, + u_upper_left_rr, orientation, + equations) + noncons_upper_right = nonconservative_flux(u_upper_right_ll, + u_upper_right_rr, + orientation, equations) + noncons_lower_left = nonconservative_flux(u_lower_left_ll, + u_lower_left_rr, orientation, + equations) + noncons_lower_right = nonconservative_flux(u_lower_right_ll, + u_lower_right_rr, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, + equations, dg, i, j) + end + else # large_sides[mortar] == 2 -> small elements on the left + for j in eachnode(dg), i in eachnode(dg) + # Pull the left and right solutions + u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left, + equations, dg, + i, j, mortar) + u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, + equations, + dg, i, j, + mortar) + u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left, + equations, dg, + i, j, mortar) + u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, + equations, + dg, i, j, + mortar) + # Call pointwise nonconservative term + noncons_upper_left = nonconservative_flux(u_upper_left_rr, + u_upper_left_ll, orientation, + equations) + noncons_upper_right = nonconservative_flux(u_upper_right_rr, + u_upper_right_ll, + orientation, equations) + noncons_lower_left = nonconservative_flux(u_lower_left_rr, + u_lower_left_ll, orientation, + equations) + noncons_lower_right = nonconservative_flux(u_lower_right_rr, + u_lower_right_ll, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left, + equations, dg, i, j) + multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, + equations, dg, i, j) + end + end - mortar_fluxes_to_elements!(surface_flux_values, - mesh, equations, mortar_l2, dg, cache, mortar, - fstar_upper_left, fstar_upper_right, - fstar_lower_left, fstar_lower_right, - fstar_tmp1) - end + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, mortar, + fstar_upper_left, fstar_upper_right, + fstar_lower_left, fstar_lower_right, + fstar_tmp1) + end - return nothing + return nothing end -@inline function calc_fstar!(destination::AbstractArray{<:Any,3}, equations, +@inline function calc_fstar!(destination::AbstractArray{<:Any, 3}, equations, surface_flux, dg::DGSEM, u_interfaces, interface, orientation) + for j in eachnode(dg), i in eachnode(dg) + # Call pointwise two-point numerical flux function + u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, j, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) - for j in eachnode(dg), i in eachnode(dg) - # Call pointwise two-point numerical flux function - u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, j, interface) - flux = surface_flux(u_ll, u_rr, orientation, equations) - - # Copy flux to left and right element storage - set_node_vars!(destination, flux, equations, dg, i, j) - end + # Copy flux to left and right element storage + set_node_vars!(destination, flux, equations, dg, i, j) + end - return nothing + return nothing end @inline function mortar_fluxes_to_elements!(surface_flux_values, @@ -1048,164 +1216,171 @@ end fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, fstar_tmp1) - lower_left_element = cache.mortars.neighbor_ids[1, mortar] - lower_right_element = cache.mortars.neighbor_ids[2, mortar] - upper_left_element = cache.mortars.neighbor_ids[3, mortar] - upper_right_element = cache.mortars.neighbor_ids[4, mortar] - large_element = cache.mortars.neighbor_ids[5, mortar] - - # Copy flux small to small - if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - direction = 3 - else # if cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - direction = 5 - end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - direction = 4 - else # if cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - direction = 6 - end - end - surface_flux_values[:, :, :, direction, upper_left_element] .= fstar_upper_left - surface_flux_values[:, :, :, direction, upper_right_element] .= fstar_upper_right - surface_flux_values[:, :, :, direction, lower_left_element] .= fstar_lower_left - surface_flux_values[:, :, :, direction, lower_right_element] .= fstar_lower_right - - # Project small fluxes to large element - if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 2 - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - direction = 4 - else # if cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - direction = 6 + lower_left_element = cache.mortars.neighbor_ids[1, mortar] + lower_right_element = cache.mortars.neighbor_ids[2, mortar] + upper_left_element = cache.mortars.neighbor_ids[3, mortar] + upper_right_element = cache.mortars.neighbor_ids[4, mortar] + large_element = cache.mortars.neighbor_ids[5, mortar] + + # Copy flux small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + direction = 3 + else # if cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + direction = 5 + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + direction = 4 + else # if cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + direction = 6 + end end - else # large_sides[mortar] == 2 -> small elements on left side - if cache.mortars.orientations[mortar] == 1 - # L2 mortars in x-direction - direction = 1 - elseif cache.mortars.orientations[mortar] == 2 - # L2 mortars in y-direction - direction = 3 - else # if cache.mortars.orientations[mortar] == 3 - # L2 mortars in z-direction - direction = 5 + surface_flux_values[:, :, :, direction, upper_left_element] .= fstar_upper_left + surface_flux_values[:, :, :, direction, upper_right_element] .= fstar_upper_right + surface_flux_values[:, :, :, direction, lower_left_element] .= fstar_lower_left + surface_flux_values[:, :, :, direction, lower_right_element] .= fstar_lower_right + + # Project small fluxes to large element + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + direction = 4 + else # if cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + direction = 6 + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + elseif cache.mortars.orientations[mortar] == 2 + # L2 mortars in y-direction + direction = 3 + else # if cache.mortars.orientations[mortar] == 3 + # L2 mortars in z-direction + direction = 5 + end end - end - - multiply_dimensionwise!( - view(surface_flux_values, :, :, :, direction, large_element), - mortar_l2.reverse_lower, mortar_l2.reverse_upper, fstar_upper_left, fstar_tmp1) - add_multiply_dimensionwise!( - view(surface_flux_values, :, :, :, direction, large_element), - mortar_l2.reverse_upper, mortar_l2.reverse_upper, fstar_upper_right, fstar_tmp1) - add_multiply_dimensionwise!( - view(surface_flux_values, :, :, :, direction, large_element), - mortar_l2.reverse_lower, mortar_l2.reverse_lower, fstar_lower_left, fstar_tmp1) - add_multiply_dimensionwise!( - view(surface_flux_values, :, :, :, direction, large_element), - mortar_l2.reverse_upper, mortar_l2.reverse_lower, fstar_lower_right, fstar_tmp1) - - return nothing -end + multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction, + large_element), + mortar_l2.reverse_lower, mortar_l2.reverse_upper, + fstar_upper_left, fstar_tmp1) + add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction, + large_element), + mortar_l2.reverse_upper, mortar_l2.reverse_upper, + fstar_upper_right, fstar_tmp1) + add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction, + large_element), + mortar_l2.reverse_lower, mortar_l2.reverse_lower, + fstar_lower_left, fstar_tmp1) + add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction, + large_element), + mortar_l2.reverse_upper, mortar_l2.reverse_lower, + fstar_lower_right, fstar_tmp1) + + return nothing +end function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}}, equations, surface_integral, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` and `-=` to let `@muladd` - # turn these into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = ( - du[v, 1, l, m, element] - surface_flux_values[v, l, m, 1, element] * factor_1) - - # surface at +x - du[v, nnodes(dg), l, m, element] = ( - du[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2) - - # surface at -y - du[v, l, 1, m, element] = ( - du[v, l, 1, m, element] - surface_flux_values[v, l, m, 3, element] * factor_1) - - # surface at +y - du[v, l, nnodes(dg), m, element] = ( - du[v, l, nnodes(dg), m, element] + surface_flux_values[v, l, m, 4, element] * factor_2) - - # surface at -z - du[v, l, m, 1, element] = ( - du[v, l, m, 1, element] - surface_flux_values[v, l, m, 5, element] * factor_1) - - # surface at +z - du[v, l, m, nnodes(dg), element] = ( - du[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2) - end + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` and `-=` to let `@muladd` + # turn these into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] - + surface_flux_values[v, l, m, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] - + surface_flux_values[v, l, m, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] - + surface_flux_values[v, l, m, 5, element] * + factor_1) + + # surface at +z + du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + + surface_flux_values[v, l, m, 6, + element] * + factor_2) + end + end end - end - return nothing + return nothing end - function apply_jacobian!(du, mesh::TreeMesh{3}, equations, dg::DG, cache) + @threaded for element in eachelement(dg, cache) + factor = -cache.elements.inverse_jacobian[element] - @threaded for element in eachelement(dg, cache) - factor = -cache.elements.inverse_jacobian[element] - - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor + end + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_sources!(du, u, t, source_terms::Nothing, equations::AbstractEquations{3}, dg::DG, cache) - return nothing + return nothing end function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{3}, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, k, element) - x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, k, element) - du_local = source_terms(u_local, x_local, t, equations) - add_to_node_vars!(du, du_local, equations, dg, i, j, k, element) + @threaded for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, k, element) + x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, + j, k, element) + du_local = source_terms(u_local, x_local, t, equations) + add_to_node_vars!(du, du_local, equations, dg, i, j, k, element) + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl index 4fe7a5477de..ec3647ed649 100644 --- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl @@ -13,7 +13,6 @@ # We do not wrap this code in `@muladd begin ... end` block. Optimizations like # this are handled automatically by LoopVectorization.jl. - # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array` # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @@ -23,244 +22,245 @@ equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, k, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, k, element] - rho_v1 = u_cons[2, i, j, k, element] - rho_v2 = u_cons[3, i, j, k, element] - rho_v3 = u_cons[4, i, j, k, element] - rho_e = u_cons[5, i, j, k, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - - u_prim[i, j, k, 1] = rho - u_prim[i, j, k, 2] = v1 - u_prim[i, j, k, 3] = v2 - u_prim[i, j, k, 4] = v3 - u_prim[i, j, k, 5] = p - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for jk in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_permuted[jk, i, 1] - v1_ll = u_prim_permuted[jk, i, 2] - v2_ll = u_prim_permuted[jk, i, 3] - v3_ll = u_prim_permuted[jk, i, 4] - p_ll = u_prim_permuted[jk, i, 5] - - rho_rr = u_prim_permuted[jk, ii, 1] - v1_rr = u_prim_permuted[jk, ii, 2] - v2_rr = u_prim_permuted[jk, ii, 3] - v3_rr = u_prim_permuted[jk, ii, 4] - p_rr = u_prim_permuted[jk, ii, 5] - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_avg * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[jk, i, 1] += factor_i * f1 - du_permuted[jk, i, 2] += factor_i * f2 - du_permuted[jk, i, 3] += factor_i * f3 - du_permuted[jk, i, 4] += factor_i * f4 - du_permuted[jk, i, 5] += factor_i * f5 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[jk, ii, 1] += factor_ii * f1 - du_permuted[jk, ii, 2] += factor_ii * f2 - du_permuted[jk, ii, 3] += factor_ii * f3 - du_permuted[jk, ii, 4] += factor_ii * f4 - du_permuted[jk, ii, 5] += factor_ii * f5 + @unpack derivative_split = dg.basis + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, k, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations)))) + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, k, element] + rho_v1 = u_cons[2, i, j, k, element] + rho_v2 = u_cons[3, i, j, k, element] + rho_v3 = u_cons[4, i, j, k, element] + rho_e = u_cons[5, i, j, k, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + + u_prim[i, j, k, 1] = rho + u_prim[i, j, k, 2] = v1 + u_prim[i, j, k, 3] = v2 + u_prim[i, j, k, 4] = v3 + u_prim[i, j, k, 5] = p end - end - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - du[i, j, k, v] = du_permuted[jk, i, v] - end - - - # y direction - # A possible permutation of array dimensions with improved opportunities for - # SIMD vectorization appeared to be slower than the direct version used here - # in preliminary numerical experiments on an AVX2 system. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for k in eachnode(dg), i in eachnode(dg) - rho_ll = u_prim[i, j, k, 1] - v1_ll = u_prim[i, j, k, 2] - v2_ll = u_prim[i, j, k, 3] - v3_ll = u_prim[i, j, k, 4] - p_ll = u_prim[i, j, k, 5] - - rho_rr = u_prim[i, jj, k, 1] - v1_rr = u_prim[i, jj, k, 2] - v2_rr = u_prim[i, jj, k, 3] - v3_rr = u_prim[i, jj, k, 4] - p_rr = u_prim[i, jj, k, 5] - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_avg * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * v3_avg - f5 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, k, 1] += factor_j * f1 - du[i, j, k, 2] += factor_j * f2 - du[i, j, k, 3] += factor_j * f3 - du[i, j, k, 4] += factor_j * f4 - du[i, j, k, 5] += factor_j * f5 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, k, 1] += factor_jj * f1 - du[i, jj, k, 2] += factor_jj * f2 - du[i, jj, k, 3] += factor_jj * f3 - du[i, jj, k, 4] += factor_jj * f4 - du[i, jj, k, 5] += factor_jj * f5 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + jk = j + nnodes(dg) * (k - 1) + u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] end - end - - - # z direction - # The memory layout is already optimal for SIMD vectorization in this loop. - # We just squeeze the first two dimensions to make the code slightly faster. - GC.@preserve u_prim begin - u_prim_reshaped = PtrArray(pointer(u_prim), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - du_reshaped = PtrArray(pointer(du), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - for k in eachnode(dg), kk in (k+1):nnodes(dg) - @turbo for ij in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_reshaped[ij, k, 1] - v1_ll = u_prim_reshaped[ij, k, 2] - v2_ll = u_prim_reshaped[ij, k, 3] - v3_ll = u_prim_reshaped[ij, k, 4] - p_ll = u_prim_reshaped[ij, k, 5] - - rho_rr = u_prim_reshaped[ij, kk, 1] - v1_rr = u_prim_reshaped[ij, kk, 2] - v2_rr = u_prim_reshaped[ij, kk, 3] - v3_rr = u_prim_reshaped[ij, kk, 4] - p_rr = u_prim_reshaped[ij, kk, 5] - - # Compute required mean values - rho_avg = 0.5 * (rho_ll + rho_rr) - v1_avg = 0.5 * ( v1_ll + v1_rr) - v2_avg = 0.5 * ( v2_ll + v2_rr) - v3_avg = 0.5 * ( v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - pv3_avg = 0.5 * (p_ll * v3_rr + p_rr * v3_ll) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_avg * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_avg - f5 = p_avg*v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg - - # Add scaled fluxes to RHS - factor_k = alpha * derivative_split[k, kk] - du_reshaped[ij, k, 1] += factor_k * f1 - du_reshaped[ij, k, 2] += factor_k * f2 - du_reshaped[ij, k, 3] += factor_k * f3 - du_reshaped[ij, k, 4] += factor_k * f4 - du_reshaped[ij, k, 5] += factor_k * f5 - - factor_kk = alpha * derivative_split[kk, k] - du_reshaped[ij, kk, 1] += factor_kk * f1 - du_reshaped[ij, kk, 2] += factor_kk * f2 - du_reshaped[ij, kk, 3] += factor_kk * f3 - du_reshaped[ij, kk, 4] += factor_kk * f4 - du_reshaped[ij, kk, 5] += factor_kk * f5 - end + fill!(du_permuted, zero(eltype(du_permuted))) + + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for jk in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_permuted[jk, i, 1] + v1_ll = u_prim_permuted[jk, i, 2] + v2_ll = u_prim_permuted[jk, i, 3] + v3_ll = u_prim_permuted[jk, i, 4] + p_ll = u_prim_permuted[jk, i, 5] + + rho_rr = u_prim_permuted[jk, ii, 1] + v1_rr = u_prim_permuted[jk, ii, 2] + v2_rr = u_prim_permuted[jk, ii, 3] + v3_rr = u_prim_permuted[jk, ii, 4] + p_rr = u_prim_permuted[jk, ii, 5] + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_avg * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[jk, i, 1] += factor_i * f1 + du_permuted[jk, i, 2] += factor_i * f2 + du_permuted[jk, i, 3] += factor_i * f3 + du_permuted[jk, i, 4] += factor_i * f4 + du_permuted[jk, i, 5] += factor_i * f5 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[jk, ii, 1] += factor_ii * f1 + du_permuted[jk, ii, 2] += factor_ii * f2 + du_permuted[jk, ii, 3] += factor_ii * f3 + du_permuted[jk, ii, 4] += factor_ii * f4 + du_permuted[jk, ii, 5] += factor_ii * f5 + end end - end # GC.@preserve u_prim + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, k, element] += du[i, j, k, v] - end -end + jk = j + nnodes(dg) * (k - 1) + du[i, j, k, v] = du_permuted[jk, i, v] + end + # y direction + # A possible permutation of array dimensions with improved opportunities for + # SIMD vectorization appeared to be slower than the direct version used here + # in preliminary numerical experiments on an AVX2 system. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for k in eachnode(dg), i in eachnode(dg) + rho_ll = u_prim[i, j, k, 1] + v1_ll = u_prim[i, j, k, 2] + v2_ll = u_prim[i, j, k, 3] + v3_ll = u_prim[i, j, k, 4] + p_ll = u_prim[i, j, k, 5] + + rho_rr = u_prim[i, jj, k, 1] + v1_rr = u_prim[i, jj, k, 2] + v2_rr = u_prim[i, jj, k, 3] + v3_rr = u_prim[i, jj, k, 4] + p_rr = u_prim[i, jj, k, 5] + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_avg * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * v3_avg + f5 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, k, 1] += factor_j * f1 + du[i, j, k, 2] += factor_j * f2 + du[i, j, k, 3] += factor_j * f3 + du[i, j, k, 4] += factor_j * f4 + du[i, j, k, 5] += factor_j * f5 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, k, 1] += factor_jj * f1 + du[i, jj, k, 2] += factor_jj * f2 + du[i, jj, k, 3] += factor_jj * f3 + du[i, jj, k, 4] += factor_jj * f4 + du[i, jj, k, 5] += factor_jj * f5 + end + end + # z direction + # The memory layout is already optimal for SIMD vectorization in this loop. + # We just squeeze the first two dimensions to make the code slightly faster. + GC.@preserve u_prim begin + u_prim_reshaped = PtrArray(pointer(u_prim), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + du_reshaped = PtrArray(pointer(du), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + for k in eachnode(dg), kk in (k + 1):nnodes(dg) + @turbo for ij in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_reshaped[ij, k, 1] + v1_ll = u_prim_reshaped[ij, k, 2] + v2_ll = u_prim_reshaped[ij, k, 3] + v3_ll = u_prim_reshaped[ij, k, 4] + p_ll = u_prim_reshaped[ij, k, 5] + + rho_rr = u_prim_reshaped[ij, kk, 1] + v1_rr = u_prim_reshaped[ij, kk, 2] + v2_rr = u_prim_reshaped[ij, kk, 3] + v3_rr = u_prim_reshaped[ij, kk, 4] + p_rr = u_prim_reshaped[ij, kk, 5] + + # Compute required mean values + rho_avg = 0.5 * (rho_ll + rho_rr) + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + pv3_avg = 0.5 * (p_ll * v3_rr + p_rr * v3_ll) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_avg * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_avg + f5 = p_avg * v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg + + # Add scaled fluxes to RHS + factor_k = alpha * derivative_split[k, kk] + du_reshaped[ij, k, 1] += factor_k * f1 + du_reshaped[ij, k, 2] += factor_k * f2 + du_reshaped[ij, k, 3] += factor_k * f3 + du_reshaped[ij, k, 4] += factor_k * f4 + du_reshaped[ij, k, 5] += factor_k * f5 + + factor_kk = alpha * derivative_split[kk, k] + du_reshaped[ij, kk, 1] += factor_kk * f1 + du_reshaped[ij, kk, 2] += factor_kk * f2 + du_reshaped[ij, kk, 3] += factor_kk * f3 + du_reshaped[ij, kk, 4] += factor_kk * f4 + du_reshaped[ij, kk, 5] += factor_kk * f5 + end + end + end # GC.@preserve u_prim + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, k, element] += du[i, j, k, v] + end +end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, mesh::TreeMesh{3}, @@ -268,341 +268,350 @@ end equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) - @unpack derivative_split = dg.basis - - # Create a temporary array that will be used to store the RHS with permuted - # indices `[i, j, k, v]` to allow using SIMD instructions. - # `StrideArray`s with purely static dimensions do not allocate on the heap. - du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations)))) - - # Convert conserved to primitive variables on the given `element`. In addition - # to the usual primitive variables, we also compute logarithms of the density - # and pressure to increase the performance of the required logarithmic mean - # values. - u_prim = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., - StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs - - @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - rho = u_cons[1, i, j, k, element] - rho_v1 = u_cons[2, i, j, k, element] - rho_v2 = u_cons[3, i, j, k, element] - rho_v3 = u_cons[4, i, j, k, element] - rho_e = u_cons[5, i, j, k, element] - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) - - u_prim[i, j, k, 1] = rho - u_prim[i, j, k, 2] = v1 - u_prim[i, j, k, 3] = v2 - u_prim[i, j, k, 4] = v3 - u_prim[i, j, k, 5] = p - u_prim[i, j, k, 6] = log(rho) - u_prim[i, j, k, 7] = log(p) - end - - - # x direction - # At first, we create new temporary arrays with permuted memory layout to - # allow using SIMD instructions along the first dimension (which is contiguous - # in memory). - du_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - u_prim_permuted = StrideArray{eltype(u_cons)}(undef, - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) - - @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] - end - fill!(du_permuted, zero(eltype(du_permuted))) - - # Next, we basically inline the volume flux. To allow SIMD vectorization and - # still use the symmetry of the volume flux and the derivative matrix, we - # loop over the triangular part in an outer loop and use a plain inner loop. - for i in eachnode(dg), ii in (i+1):nnodes(dg) - @turbo for jk in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_permuted[jk, i, 1] - v1_ll = u_prim_permuted[jk, i, 2] - v2_ll = u_prim_permuted[jk, i, 3] - v3_ll = u_prim_permuted[jk, i, 4] - p_ll = u_prim_permuted[jk, i, 5] - log_rho_ll = u_prim_permuted[jk, i, 6] - log_p_ll = u_prim_permuted[jk, i, 7] - - rho_rr = u_prim_permuted[jk, ii, 1] - v1_rr = u_prim_permuted[jk, ii, 2] - v2_rr = u_prim_permuted[jk, ii, 3] - v3_rr = u_prim_permuted[jk, ii, 4] - p_rr = u_prim_permuted[jk, ii, 5] - log_rho_rr = u_prim_permuted[jk, ii, 6] - log_p_rr = u_prim_permuted[jk, ii, 7] - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_mean * v1_avg - f2 = f1 * v1_avg + p_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll) - - # Add scaled fluxes to RHS - factor_i = alpha * derivative_split[i, ii] - du_permuted[jk, i, 1] += factor_i * f1 - du_permuted[jk, i, 2] += factor_i * f2 - du_permuted[jk, i, 3] += factor_i * f3 - du_permuted[jk, i, 4] += factor_i * f4 - du_permuted[jk, i, 5] += factor_i * f5 - - factor_ii = alpha * derivative_split[ii, i] - du_permuted[jk, ii, 1] += factor_ii * f1 - du_permuted[jk, ii, 2] += factor_ii * f2 - du_permuted[jk, ii, 3] += factor_ii * f3 - du_permuted[jk, ii, 4] += factor_ii * f4 - du_permuted[jk, ii, 5] += factor_ii * f5 + @unpack derivative_split = dg.basis + + # Create a temporary array that will be used to store the RHS with permuted + # indices `[i, j, k, v]` to allow using SIMD instructions. + # `StrideArray`s with purely static dimensions do not allocate on the heap. + du = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + StaticInt(nvariables(equations)))) + + # Convert conserved to primitive variables on the given `element`. In addition + # to the usual primitive variables, we also compute logarithms of the density + # and pressure to increase the performance of the required logarithmic mean + # values. + u_prim = StrideArray{eltype(u_cons)}(undef, + (ntuple(_ -> StaticInt(nnodes(dg)), + ndims(mesh))..., + StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs + + @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + rho = u_cons[1, i, j, k, element] + rho_v1 = u_cons[2, i, j, k, element] + rho_v2 = u_cons[3, i, j, k, element] + rho_v3 = u_cons[4, i, j, k, element] + rho_e = u_cons[5, i, j, k, element] + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (equations.gamma - 1) * + (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)) + + u_prim[i, j, k, 1] = rho + u_prim[i, j, k, 2] = v1 + u_prim[i, j, k, 3] = v2 + u_prim[i, j, k, 4] = v3 + u_prim[i, j, k, 5] = p + u_prim[i, j, k, 6] = log(rho) + u_prim[i, j, k, 7] = log(p) end - end - - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - jk = j + nnodes(dg) * (k- 1) - du[i, j, k, v] = du_permuted[jk, i, v] - end - - - # y direction - # A possible permutation of array dimensions with improved opportunities for - # SIMD vectorization appeared to be slower than the direct version used here - # in preliminary numerical experiments on an AVX2 system. - for j in eachnode(dg), jj in (j+1):nnodes(dg) - @turbo for k in eachnode(dg), i in eachnode(dg) - rho_ll = u_prim[i, j, k, 1] - v1_ll = u_prim[i, j, k, 2] - v2_ll = u_prim[i, j, k, 3] - v3_ll = u_prim[i, j, k, 4] - p_ll = u_prim[i, j, k, 5] - log_rho_ll = u_prim[i, j, k, 6] - log_p_ll = u_prim[i, j, k, 7] - - rho_rr = u_prim[i, jj, k, 1] - v1_rr = u_prim[i, jj, k, 2] - v2_rr = u_prim[i, jj, k, 3] - v3_rr = u_prim[i, jj, k, 4] - p_rr = u_prim[i, jj, k, 5] - log_rho_rr = u_prim[i, jj, k, 6] - log_p_rr = u_prim[i, jj, k, 7] - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_mean * v2_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg + p_avg - f4 = f1 * v3_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll) - - # Add scaled fluxes to RHS - factor_j = alpha * derivative_split[j, jj] - du[i, j, k, 1] += factor_j * f1 - du[i, j, k, 2] += factor_j * f2 - du[i, j, k, 3] += factor_j * f3 - du[i, j, k, 4] += factor_j * f4 - du[i, j, k, 5] += factor_j * f5 - - factor_jj = alpha * derivative_split[jj, j] - du[i, jj, k, 1] += factor_jj * f1 - du[i, jj, k, 2] += factor_jj * f2 - du[i, jj, k, 3] += factor_jj * f3 - du[i, jj, k, 4] += factor_jj * f4 - du[i, jj, k, 5] += factor_jj * f5 + + # x direction + # At first, we create new temporary arrays with permuted memory layout to + # allow using SIMD instructions along the first dimension (which is contiguous + # in memory). + du_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + u_prim_permuted = StrideArray{eltype(u_cons)}(undef, + (StaticInt(nnodes(dg)^2), + StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + jk = j + nnodes(dg) * (k - 1) + u_prim_permuted[jk, i, v] = u_prim[i, j, k, v] end - end - - - # z direction - # The memory layout is already optimal for SIMD vectorization in this loop. - # We just squeeze the first two dimensions to make the code slightly faster. - GC.@preserve u_prim begin - u_prim_reshaped = PtrArray(pointer(u_prim), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations) + 2))) - - du_reshaped = PtrArray(pointer(du), - (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(nvariables(equations)))) - - for k in eachnode(dg), kk in (k+1):nnodes(dg) - @turbo for ij in Base.OneTo(nnodes(dg)^2) - rho_ll = u_prim_reshaped[ij, k, 1] - v1_ll = u_prim_reshaped[ij, k, 2] - v2_ll = u_prim_reshaped[ij, k, 3] - v3_ll = u_prim_reshaped[ij, k, 4] - p_ll = u_prim_reshaped[ij, k, 5] - log_rho_ll = u_prim_reshaped[ij, k, 6] - log_p_ll = u_prim_reshaped[ij, k, 7] - - rho_rr = u_prim_reshaped[ij, kk, 1] - v1_rr = u_prim_reshaped[ij, kk, 2] - v2_rr = u_prim_reshaped[ij, kk, 3] - v3_rr = u_prim_reshaped[ij, kk, 4] - p_rr = u_prim_reshaped[ij, kk, 5] - log_rho_rr = u_prim_reshaped[ij, kk, 6] - log_p_rr = u_prim_reshaped[ij, kk, 7] - - # Compute required mean values - # We inline the logarithmic mean to allow LoopVectorization.jl to optimize - # it efficiently. This is equivalent to - # rho_mean = ln_mean(rho_ll, rho_rr) - x1 = rho_ll - log_x1 = log_rho_ll - y1 = rho_rr - log_y1 = log_rho_rr - x1_plus_y1 = x1 + y1 - y1_minus_x1 = y1 - x1 - z1 = y1_minus_x1^2 / x1_plus_y1^2 - special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1))) - regular_path1 = y1_minus_x1 / (log_y1 - log_x1) - rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) - - # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` - # in exact arithmetic since - # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) - # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) - # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) - x2 = rho_ll * p_rr - log_x2 = log_rho_ll + log_p_rr - y2 = rho_rr * p_ll - log_y2 = log_rho_rr + log_p_ll - x2_plus_y2 = x2 + y2 - y2_minus_x2 = y2 - x2 - z2 = y2_minus_x2^2 / x2_plus_y2^2 - special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2 - regular_path2 = (log_y2 - log_x2) / y2_minus_x2 - inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) - - v1_avg = 0.5 * (v1_ll + v1_rr) - v2_avg = 0.5 * (v2_ll + v2_rr) - v3_avg = 0.5 * (v3_ll + v3_rr) - p_avg = 0.5 * ( p_ll + p_rr) - velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) - - # Calculate fluxes depending on Cartesian orientation - f1 = rho_mean * v3_avg - f2 = f1 * v1_avg - f3 = f1 * v2_avg - f4 = f1 * v3_avg + p_avg - f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v3_rr + p_rr*v3_ll) - - # Add scaled fluxes to RHS - factor_k = alpha * derivative_split[k, kk] - du_reshaped[ij, k, 1] += factor_k * f1 - du_reshaped[ij, k, 2] += factor_k * f2 - du_reshaped[ij, k, 3] += factor_k * f3 - du_reshaped[ij, k, 4] += factor_k * f4 - du_reshaped[ij, k, 5] += factor_k * f5 - - factor_kk = alpha * derivative_split[kk, k] - du_reshaped[ij, kk, 1] += factor_kk * f1 - du_reshaped[ij, kk, 2] += factor_kk * f2 - du_reshaped[ij, kk, 3] += factor_kk * f3 - du_reshaped[ij, kk, 4] += factor_kk * f4 - du_reshaped[ij, kk, 5] += factor_kk * f5 - end + fill!(du_permuted, zero(eltype(du_permuted))) + + # Next, we basically inline the volume flux. To allow SIMD vectorization and + # still use the symmetry of the volume flux and the derivative matrix, we + # loop over the triangular part in an outer loop and use a plain inner loop. + for i in eachnode(dg), ii in (i + 1):nnodes(dg) + @turbo for jk in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_permuted[jk, i, 1] + v1_ll = u_prim_permuted[jk, i, 2] + v2_ll = u_prim_permuted[jk, i, 3] + v3_ll = u_prim_permuted[jk, i, 4] + p_ll = u_prim_permuted[jk, i, 5] + log_rho_ll = u_prim_permuted[jk, i, 6] + log_p_ll = u_prim_permuted[jk, i, 7] + + rho_rr = u_prim_permuted[jk, ii, 1] + v1_rr = u_prim_permuted[jk, ii, 2] + v2_rr = u_prim_permuted[jk, ii, 3] + v3_rr = u_prim_permuted[jk, ii, 4] + p_rr = u_prim_permuted[jk, ii, 5] + log_rho_rr = u_prim_permuted[jk, ii, 6] + log_p_rr = u_prim_permuted[jk, ii, 7] + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_mean * v1_avg + f2 = f1 * v1_avg + p_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v1_rr + p_rr * v1_ll) + + # Add scaled fluxes to RHS + factor_i = alpha * derivative_split[i, ii] + du_permuted[jk, i, 1] += factor_i * f1 + du_permuted[jk, i, 2] += factor_i * f2 + du_permuted[jk, i, 3] += factor_i * f3 + du_permuted[jk, i, 4] += factor_i * f4 + du_permuted[jk, i, 5] += factor_i * f5 + + factor_ii = alpha * derivative_split[ii, i] + du_permuted[jk, ii, 1] += factor_ii * f1 + du_permuted[jk, ii, 2] += factor_ii * f2 + du_permuted[jk, ii, 3] += factor_ii * f3 + du_permuted[jk, ii, 4] += factor_ii * f4 + du_permuted[jk, ii, 5] += factor_ii * f5 + end end - end # GC.@preserve u_prim + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) - # Finally, we add the temporary RHS computed here to the global RHS in the - # given `element`. - @turbo for v in eachvariable(equations), - k in eachnode(dg), - j in eachnode(dg), - i in eachnode(dg) - _du[v, i, j, k, element] += du[i, j, k, v] - end -end + jk = j + nnodes(dg) * (k - 1) + du[i, j, k, v] = du_permuted[jk, i, v] + end + # y direction + # A possible permutation of array dimensions with improved opportunities for + # SIMD vectorization appeared to be slower than the direct version used here + # in preliminary numerical experiments on an AVX2 system. + for j in eachnode(dg), jj in (j + 1):nnodes(dg) + @turbo for k in eachnode(dg), i in eachnode(dg) + rho_ll = u_prim[i, j, k, 1] + v1_ll = u_prim[i, j, k, 2] + v2_ll = u_prim[i, j, k, 3] + v3_ll = u_prim[i, j, k, 4] + p_ll = u_prim[i, j, k, 5] + log_rho_ll = u_prim[i, j, k, 6] + log_p_ll = u_prim[i, j, k, 7] + + rho_rr = u_prim[i, jj, k, 1] + v1_rr = u_prim[i, jj, k, 2] + v2_rr = u_prim[i, jj, k, 3] + v3_rr = u_prim[i, jj, k, 4] + p_rr = u_prim[i, jj, k, 5] + log_rho_rr = u_prim[i, jj, k, 6] + log_p_rr = u_prim[i, jj, k, 7] + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_mean * v2_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + p_avg + f4 = f1 * v3_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v2_rr + p_rr * v2_ll) + + # Add scaled fluxes to RHS + factor_j = alpha * derivative_split[j, jj] + du[i, j, k, 1] += factor_j * f1 + du[i, j, k, 2] += factor_j * f2 + du[i, j, k, 3] += factor_j * f3 + du[i, j, k, 4] += factor_j * f4 + du[i, j, k, 5] += factor_j * f5 + + factor_jj = alpha * derivative_split[jj, j] + du[i, jj, k, 1] += factor_jj * f1 + du[i, jj, k, 2] += factor_jj * f2 + du[i, jj, k, 3] += factor_jj * f3 + du[i, jj, k, 4] += factor_jj * f4 + du[i, jj, k, 5] += factor_jj * f5 + end + end + + # z direction + # The memory layout is already optimal for SIMD vectorization in this loop. + # We just squeeze the first two dimensions to make the code slightly faster. + GC.@preserve u_prim begin + u_prim_reshaped = PtrArray(pointer(u_prim), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations) + 2))) + + du_reshaped = PtrArray(pointer(du), + (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), + StaticInt(nvariables(equations)))) + + for k in eachnode(dg), kk in (k + 1):nnodes(dg) + @turbo for ij in Base.OneTo(nnodes(dg)^2) + rho_ll = u_prim_reshaped[ij, k, 1] + v1_ll = u_prim_reshaped[ij, k, 2] + v2_ll = u_prim_reshaped[ij, k, 3] + v3_ll = u_prim_reshaped[ij, k, 4] + p_ll = u_prim_reshaped[ij, k, 5] + log_rho_ll = u_prim_reshaped[ij, k, 6] + log_p_ll = u_prim_reshaped[ij, k, 7] + + rho_rr = u_prim_reshaped[ij, kk, 1] + v1_rr = u_prim_reshaped[ij, kk, 2] + v2_rr = u_prim_reshaped[ij, kk, 3] + v3_rr = u_prim_reshaped[ij, kk, 4] + p_rr = u_prim_reshaped[ij, kk, 5] + log_rho_rr = u_prim_reshaped[ij, kk, 6] + log_p_rr = u_prim_reshaped[ij, kk, 7] + + # Compute required mean values + # We inline the logarithmic mean to allow LoopVectorization.jl to optimize + # it efficiently. This is equivalent to + # rho_mean = ln_mean(rho_ll, rho_rr) + x1 = rho_ll + log_x1 = log_rho_ll + y1 = rho_rr + log_y1 = log_rho_rr + x1_plus_y1 = x1 + y1 + y1_minus_x1 = y1 - x1 + z1 = y1_minus_x1^2 / x1_plus_y1^2 + special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1))) + regular_path1 = y1_minus_x1 / (log_y1 - log_x1) + rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1) + + # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)` + # in exact arithmetic since + # log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ) + # = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ) + # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll) + x2 = rho_ll * p_rr + log_x2 = log_rho_ll + log_p_rr + y2 = rho_rr * p_ll + log_y2 = log_rho_rr + log_p_ll + x2_plus_y2 = x2 + y2 + y2_minus_x2 = y2 - x2 + z2 = y2_minus_x2^2 / x2_plus_y2^2 + special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2 + regular_path2 = (log_y2 - log_x2) / y2_minus_x2 + inv_rho_p_mean = p_ll * p_rr * + ifelse(z2 < 1.0e-4, special_path2, regular_path2) + + v1_avg = 0.5 * (v1_ll + v1_rr) + v2_avg = 0.5 * (v2_ll + v2_rr) + v3_avg = 0.5 * (v3_ll + v3_rr) + p_avg = 0.5 * (p_ll + p_rr) + velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr) + + # Calculate fluxes depending on Cartesian orientation + f1 = rho_mean * v3_avg + f2 = f1 * v1_avg + f3 = f1 * v2_avg + f4 = f1 * v3_avg + p_avg + f5 = f1 * + (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) + + 0.5 * (p_ll * v3_rr + p_rr * v3_ll) + + # Add scaled fluxes to RHS + factor_k = alpha * derivative_split[k, kk] + du_reshaped[ij, k, 1] += factor_k * f1 + du_reshaped[ij, k, 2] += factor_k * f2 + du_reshaped[ij, k, 3] += factor_k * f3 + du_reshaped[ij, k, 4] += factor_k * f4 + du_reshaped[ij, k, 5] += factor_k * f5 + + factor_kk = alpha * derivative_split[kk, k] + du_reshaped[ij, kk, 1] += factor_kk * f1 + du_reshaped[ij, kk, 2] += factor_kk * f2 + du_reshaped[ij, kk, 3] += factor_kk * f3 + du_reshaped[ij, kk, 4] += factor_kk * f4 + du_reshaped[ij, kk, 5] += factor_kk * f5 + end + end + end # GC.@preserve u_prim + + # Finally, we add the temporary RHS computed here to the global RHS in the + # given `element`. + @turbo for v in eachvariable(equations), + k in eachnode(dg), + j in eachnode(dg), + i in eachnode(dg) + + _du[v, i, j, k, element] += du[i, j, k, v] + end +end diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl index d3a47cb06be..d6d74637021 100644 --- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # This file collects all methods that have been updated to work with parabolic systems of equations # @@ -12,74 +13,93 @@ # 2. compute f(u, grad(u)) # 3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call) # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))). -function rhs_parabolic!(du, u, t, mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, +function rhs_parabolic!(du, u, t, mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, initial_condition, boundary_conditions_parabolic, source_terms, dg::DG, parabolic_scheme, cache, cache_parabolic) - @unpack u_transformed, gradients, flux_viscous = cache_parabolic - - # Convert conservative variables to a form more suitable for viscous flux calculations - @trixi_timeit timer() "transform variables" transform_variables!( - u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic) - - # Compute the gradients of the transformed variables - @trixi_timeit timer() "calculate gradient" calc_gradient!( - gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg, - cache, cache_parabolic) - - # Compute and store the viscous fluxes - @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!( - flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic) - - # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it - # computes the divergence of the viscous fluxes) - # - # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have - # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the - # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the - # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it - # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* - # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we - # do not need to recreate the existing data structure only with a different name, and c) we do not - # need to interpolate solutions *and* gradients to the surfaces. - - # TODO: parabolic; reconsider current data structure reuse strategy - - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, flux_viscous, mesh, equations_parabolic, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # TODO: parabolic; extend to mortars - @assert nmortars(dg, cache) == 0 - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) - - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations_parabolic, dg, cache_parabolic) - - return nothing + @unpack u_transformed, gradients, flux_viscous = cache_parabolic + + # Convert conservative variables to a form more suitable for viscous flux calculations + @trixi_timeit timer() "transform variables" begin + transform_variables!(u_transformed, u, mesh, equations_parabolic, + dg, parabolic_scheme, cache, cache_parabolic) + end + + # Compute the gradients of the transformed variables + @trixi_timeit timer() "calculate gradient" begin + calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic, + boundary_conditions_parabolic, dg, cache, cache_parabolic) + end + + # Compute and store the viscous fluxes + @trixi_timeit timer() "calculate viscous fluxes" begin + calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh, + equations_parabolic, dg, cache, cache_parabolic) + end + + # The remainder of this function is essentially a regular rhs! for parabolic + # equations (i.e., it computes the divergence of the viscous fluxes) + # + # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have + # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the + # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the + # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it + # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values* + # and *not the solution*. The advantage is that a) we do not need to allocate more storage, b) we + # do not need to recreate the existing data structure only with a different name, and c) we do not + # need to interpolate solutions *and* gradients to the surfaces. + + # TODO: parabolic; reconsider current data structure reuse strategy + + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache) + end + + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh, + equations_parabolic, dg, cache_parabolic) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic, + dg.surface_integral, dg, cache) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_divergence!(cache_parabolic, t, + boundary_conditions_parabolic, + mesh, equations_parabolic, + dg.surface_integral, dg) + end + + # TODO: parabolic; extend to mortars + @assert nmortars(dg, cache) == 0 + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations_parabolic, + dg.surface_integral, dg, cache_parabolic) + end + + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic) + end + + return nothing end # Transform solution variables prior to taking the gradient @@ -88,580 +108,716 @@ end function transform_variables!(u_transformed, u, mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, cache, cache_parabolic) - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations_parabolic, dg, i, j, k, element) - u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic) - set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, j, k, element) + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations_parabolic, dg, i, j, k, element) + u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, + equations_parabolic) + set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, + i, j, k, element) + end end - end end # This is the version used when calculating the divergence of the viscous fluxes function calc_volume_integral!(du, flux_viscous, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, dg::DGSEM, cache) - @unpack derivative_dhat = dg.basis - flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous - - @threaded for element in eachelement(dg, cache) - # Calculate volume terms in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, k, element) - flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, k, element) - flux_3_node = get_node_vars(flux_viscous_z, equations_parabolic, dg, i, j, k, element) - - for ii in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, j, k, element) - end - - for jj in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, equations_parabolic, dg, i, jj, k, element) - end - - for kk in eachnode(dg) - multiply_add_to_node_vars!(du, derivative_dhat[kk, k], flux_3_node, equations_parabolic, dg, i, j, kk, element) - end + @unpack derivative_dhat = dg.basis + flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous + + @threaded for element in eachelement(dg, cache) + # Calculate volume terms in one element + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, + k, element) + flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, + k, element) + flux_3_node = get_node_vars(flux_viscous_z, equations_parabolic, dg, i, j, + k, element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, + equations_parabolic, dg, ii, j, k, element) + end + + for jj in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, + equations_parabolic, dg, i, jj, k, element) + end + + for kk in eachnode(dg) + multiply_add_to_node_vars!(du, derivative_dhat[kk, k], flux_3_node, + equations_parabolic, dg, i, j, kk, element) + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes # We pass the `surface_integral` argument solely for dispatch function prolong2interfaces!(cache_parabolic, flux_viscous, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache) - @unpack interfaces = cache_parabolic - @unpack orientations = interfaces - - flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous - - @threaded for interface in eachinterface(dg, cache) - left_element = interfaces.neighbor_ids[1, interface] - right_element = interfaces.neighbor_ids[2, interface] - - if orientations[interface] == 1 - # interface in x-direction - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j, k, left_element] - interfaces.u[2, v, j, k, interface] = flux_viscous_x[v, 1, j, k, right_element] - end - elseif orientations[interface] == 2 - # interface in y-direction - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg), k, left_element] - interfaces.u[2, v, i, k, interface] = flux_viscous_y[v, i, 1, k, right_element] - end - else # if orientations[interface] == 3 - # interface in z-direction - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! - interfaces.u[1, v, i, j, interface] = flux_viscous_z[v, i, j, nnodes(dg), left_element] - interfaces.u[2, v, i, j, interface] = flux_viscous_z[v, i, j, 1, right_element] - end + @unpack interfaces = cache_parabolic + @unpack orientations = interfaces + + flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous + + @threaded for interface in eachinterface(dg, cache) + left_element = interfaces.neighbor_ids[1, interface] + right_element = interfaces.neighbor_ids[2, interface] + + if orientations[interface] == 1 + # interface in x-direction + for k in eachnode(dg), j in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j, + k, left_element] + interfaces.u[2, v, j, k, interface] = flux_viscous_x[v, 1, j, k, + right_element] + end + elseif orientations[interface] == 2 + # interface in y-direction + for k in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg), + k, left_element] + interfaces.u[2, v, i, k, interface] = flux_viscous_y[v, i, 1, k, + right_element] + end + else # if orientations[interface] == 3 + # interface in z-direction + for j in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*! + interfaces.u[1, v, i, j, interface] = flux_viscous_z[v, i, j, + nnodes(dg), + left_element] + interfaces.u[2, v, i, j, interface] = flux_viscous_z[v, i, j, 1, + right_element] + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes function calc_interface_flux!(surface_flux_values, mesh::TreeMesh{3}, equations_parabolic, dg::DG, cache_parabolic) - @unpack neighbor_ids, orientations = cache_parabolic.interfaces - - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - # orientation = 3: left -> 6, right -> 5 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for j in eachnode(dg), i in eachnode(dg) - # Get precomputed fluxes at interfaces - flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic, - dg, i, j, interface) - - # Compute interface flux as mean of left and right viscous fluxes - # TODO: parabolic; only BR1 at the moment - flux = 0.5 * (flux_ll + flux_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, j, left_direction, left_id] = flux[v] - surface_flux_values[v, i, j, right_direction, right_id] = flux[v] - end + @unpack neighbor_ids, orientations = cache_parabolic.interfaces + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + # orientation = 3: left -> 6, right -> 5 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for j in eachnode(dg), i in eachnode(dg) + # Get precomputed fluxes at interfaces + flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, + dg, i, j, interface) + + # Compute interface flux as mean of left and right viscous fluxes + # TODO: parabolic; only BR1 at the moment + flux = 0.5 * (flux_ll + flux_rr) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, j, left_direction, left_id] = flux[v] + surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + end + end end - end - return nothing + return nothing end - # This is the version used when calculating the divergence of the viscous fluxes function prolong2boundaries!(cache_parabolic, flux_viscous, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache) - @unpack boundaries = cache_parabolic - @unpack orientations, neighbor_sides = boundaries - flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous - - @threaded for boundary in eachboundary(dg, cache_parabolic) - element = boundaries.neighbor_ids[boundary] - - if orientations[boundary] == 1 - # boundary in x-direction - if neighbor_sides[boundary] == 1 - # element in -x direction of boundary - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg), j, k, element] - end - else # Element in +x direction of boundary - for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, j, k, boundary] = flux_viscous_x[v, 1, j, k, element] - end - end - elseif orientations[boundary] == 2 - # boundary in y-direction - if neighbor_sides[boundary] == 1 - # element in -y direction of boundary - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, i, k, boundary] = flux_viscous_y[v, i, nnodes(dg), k, element] - end - else - # element in +y direction of boundary - for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1, k, element] - end - end - else # if orientations[boundary] == 3 - # boundary in z-direction - if neighbor_sides[boundary] == 1 - # element in -z direction of boundary - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[1, v, i, j, boundary] = flux_viscous_z[v, i, j, nnodes(dg), element] + @unpack boundaries = cache_parabolic + @unpack orientations, neighbor_sides = boundaries + flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous + + @threaded for boundary in eachboundary(dg, cache_parabolic) + element = boundaries.neighbor_ids[boundary] + + if orientations[boundary] == 1 + # boundary in x-direction + if neighbor_sides[boundary] == 1 + # element in -x direction of boundary + for k in eachnode(dg), j in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg), + j, k, element] + end + else # Element in +x direction of boundary + for k in eachnode(dg), j in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, j, k, boundary] = flux_viscous_x[v, 1, j, k, + element] + end + end + elseif orientations[boundary] == 2 + # boundary in y-direction + if neighbor_sides[boundary] == 1 + # element in -y direction of boundary + for k in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, i, k, boundary] = flux_viscous_y[v, i, + nnodes(dg), k, + element] + end + else + # element in +y direction of boundary + for k in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1, k, + element] + end + end + else # if orientations[boundary] == 3 + # boundary in z-direction + if neighbor_sides[boundary] == 1 + # element in -z direction of boundary + for j in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[1, v, i, j, boundary] = flux_viscous_z[v, i, j, + nnodes(dg), + element] + end + else + # element in +z direction of boundary + for j in eachnode(dg), i in eachnode(dg), + v in eachvariable(equations_parabolic) + # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! + boundaries.u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1, + element] + end + end end - else - # element in +z direction of boundary - for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic) - # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*! - boundaries.u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1, element] - end - end end - end - return nothing + return nothing end - function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache, cache_parabolic) - gradients_x, gradients_y, gradients_z = gradients - flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous # output arrays - - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - # Get solution and gradients - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, element) - gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k, element) - gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k, element) - gradients_3_node = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k, element) - - # Calculate viscous flux and store each component for later use - flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 1, equations_parabolic) - flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 2, equations_parabolic) - flux_viscous_node_z = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 3, equations_parabolic) - set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, i, j, k, element) - set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, i, j, k, element) - set_node_vars!(flux_viscous_z, flux_viscous_node_z, equations_parabolic, dg, i, j, k, element) + gradients_x, gradients_y, gradients_z = gradients + flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous # output arrays + + @threaded for element in eachelement(dg, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + # Get solution and gradients + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, + element) + gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, + k, element) + gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, + k, element) + gradients_3_node = get_node_vars(gradients_z, equations_parabolic, dg, i, j, + k, element) + + # Calculate viscous flux and store each component for later use + flux_viscous_node_x = flux(u_node, + (gradients_1_node, gradients_2_node, + gradients_3_node), 1, equations_parabolic) + flux_viscous_node_y = flux(u_node, + (gradients_1_node, gradients_2_node, + gradients_3_node), 2, equations_parabolic) + flux_viscous_node_z = flux(u_node, + (gradients_1_node, gradients_2_node, + gradients_3_node), 3, equations_parabolic) + set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, + i, j, k, element) + set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, + i, j, k, element) + set_node_vars!(flux_viscous_z, flux_viscous_node_z, equations_parabolic, dg, + i, j, k, element) + end end - end end - # TODO: parabolic; decide if we should keep this. function get_unsigned_normal_vector_3d(direction) - if direction > 6 || direction < 1 - error("Direction = $direction; in 3D, direction should be 1, 2, 3, 4, 5, or 6.") - end - if direction == 1 || direction == 2 - return SVector(1.0, 0.0, 0.0) - elseif direction == 3 || direction == 4 - return SVector(0.0, 1.0, 0.0) - else - return SVector(0.0, 0.0, 1.0) - end + if direction > 6 || direction < 1 + error("Direction = $direction; in 3D, direction should be 1, 2, 3, 4, 5, or 6.") + end + if direction == 1 || direction == 2 + return SVector(1.0, 0.0, 0.0) + elseif direction == 3 || direction == 4 + return SVector(0.0, 1.0, 0.0) + else + return SVector(0.0, 0.0, 1.0) + end end -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - return nothing +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::BoundaryConditionPeriodic, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - return nothing + return nothing end -function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, - surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[3], - equations_parabolic, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[4], - equations_parabolic, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[5], - equations_parabolic, surface_integral, dg, cache, - 5, firsts[5], lasts[5]) - calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[6], - equations_parabolic, surface_integral, dg, cache, - 6, firsts[6], lasts[6]) +function calc_boundary_flux_gradients!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, + surface_integral, dg::DG) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, dg, + cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, dg, + cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[3], + equations_parabolic, surface_integral, dg, + cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[4], + equations_parabolic, surface_integral, dg, + cache, + 4, firsts[4], lasts[4]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[5], + equations_parabolic, surface_integral, dg, + cache, + 5, firsts[5], lasts[5]) + calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, + boundary_conditions_parabolic[6], + equations_parabolic, surface_integral, dg, + cache, + 6, firsts[6], lasts[6]) end - -function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,5}, t, +function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{ + <:Any, + 5 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for j in eachnode(dg), i in eachnode(dg) - # Get boundary flux - u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - u_inner = u_ll - else # Element is on the right, boundary on the left - u_inner = u_rr - end - - # TODO: revisit if we want more general boundary treatments. - # This assumes the gradient numerical flux at the boundary is the gradient variable, - # which is consistent with BR1, LDG. - flux_inner = u_inner - - x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, boundary) - flux = boundary_condition(flux_inner, u_inner, get_unsigned_normal_vector_3d(direction), - x, t, Gradient(), equations_parabolic) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, j, direction, neighbor] = flux[v] - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for j in eachnode(dg), i in eachnode(dg) + # Get boundary flux + u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, + boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + u_inner = u_ll + else # Element is on the right, boundary on the left + u_inner = u_rr + end + + # TODO: revisit if we want more general boundary treatments. + # This assumes the gradient numerical flux at the boundary is the gradient variable, + # which is consistent with BR1, LDG. + flux_inner = u_inner + + x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, + boundary) + flux = boundary_condition(flux_inner, u_inner, + get_unsigned_normal_vector_3d(direction), + x, t, Gradient(), equations_parabolic) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, j, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end -function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple, - mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic, +function calc_boundary_flux_divergence!(cache, t, + boundary_conditions_parabolic::NamedTuple, + mesh::TreeMesh{3}, + equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack n_boundaries_per_direction = cache.boundaries - - # Calculate indices - lasts = accumulate(+, n_boundaries_per_direction) - firsts = lasts - n_boundaries_per_direction .+ 1 - - # Calc boundary fluxes in each direction - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1], - equations_parabolic, surface_integral, dg, cache, - 1, firsts[1], lasts[1]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2], - equations_parabolic, surface_integral, dg, cache, - 2, firsts[2], lasts[2]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[3], - equations_parabolic, surface_integral, dg, cache, - 3, firsts[3], lasts[3]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[4], - equations_parabolic, surface_integral, dg, cache, - 4, firsts[4], lasts[4]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[5], - equations_parabolic, surface_integral, dg, cache, - 5, firsts[5], lasts[5]) - calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[6], - equations_parabolic, surface_integral, dg, cache, - 6, firsts[6], lasts[6]) + @unpack surface_flux_values = cache.elements + @unpack n_boundaries_per_direction = cache.boundaries + + # Calculate indices + lasts = accumulate(+, n_boundaries_per_direction) + firsts = lasts - n_boundaries_per_direction .+ 1 + + # Calc boundary fluxes in each direction + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[1], + equations_parabolic, surface_integral, + dg, cache, + 1, firsts[1], lasts[1]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[2], + equations_parabolic, surface_integral, + dg, cache, + 2, firsts[2], lasts[2]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[3], + equations_parabolic, surface_integral, + dg, cache, + 3, firsts[3], lasts[3]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[4], + equations_parabolic, surface_integral, + dg, cache, + 4, firsts[4], lasts[4]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[5], + equations_parabolic, surface_integral, + dg, cache, + 5, firsts[5], lasts[5]) + calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, + boundary_conditions_parabolic[6], + equations_parabolic, surface_integral, + dg, cache, + 6, firsts[6], lasts[6]) end -function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,5}, t, +function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{ + <:Any, + 5 + }, + t, boundary_condition, equations_parabolic::AbstractEquationsParabolic, surface_integral, dg::DG, cache, - direction, first_boundary, last_boundary) - @unpack surface_flux = surface_integral - - # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") - # of the viscous flux, as computed in `prolong2boundaries!` - @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries - - @threaded for boundary in first_boundary:last_boundary - # Get neighboring element - neighbor = neighbor_ids[boundary] - - for j in eachnode(dg), i in eachnode(dg) - # Get viscous boundary fluxes - flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, boundary) - if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right - flux_inner = flux_ll - else # Element is on the right, boundary on the left - flux_inner = flux_rr - end - - x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, boundary) - - # TODO: add a field in `cache.boundaries` for gradient information. UPDATE THIS COMMENT - # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. - # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion3D and - # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion3D as of 2022-6-27. - # It will not work with implementations which utilize `u_inner` to impose boundary conditions. - flux = boundary_condition(flux_inner, nothing, get_unsigned_normal_vector_3d(direction), - x, t, Divergence(), equations_parabolic) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, j, direction, neighbor] = flux[v] - end + direction, first_boundary, + last_boundary) + @unpack surface_flux = surface_integral + + # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction") + # of the viscous flux, as computed in `prolong2boundaries!` + @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries + + @threaded for boundary in first_boundary:last_boundary + # Get neighboring element + neighbor = neighbor_ids[boundary] + + for j in eachnode(dg), i in eachnode(dg) + # Get viscous boundary fluxes + flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, + boundary) + if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right + flux_inner = flux_ll + else # Element is on the right, boundary on the left + flux_inner = flux_rr + end + + x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, + boundary) + + # TODO: add a field in `cache.boundaries` for gradient information. UPDATE THIS COMMENT + # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information. + # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion3D and + # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion3D as of 2022-6-27. + # It will not work with implementations which utilize `u_inner` to impose boundary conditions. + flux = boundary_condition(flux_inner, nothing, + get_unsigned_normal_vector_3d(direction), + x, t, Divergence(), equations_parabolic) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, j, direction, neighbor] = flux[v] + end + end end - end - return nothing + return nothing end - # Calculate the gradient of the transformed variables function calc_gradient!(gradients, u_transformed, t, mesh::TreeMesh{3}, equations_parabolic, boundary_conditions_parabolic, dg::DG, cache, cache_parabolic) + gradients_x, gradients_y, gradients_z = gradients - gradients_x, gradients_y, gradients_z = gradients - - # Reset du - @trixi_timeit timer() "reset gradients" begin - reset_du!(gradients_x, dg, cache) - reset_du!(gradients_y, dg, cache) - reset_du!(gradients_z, dg, cache) - end - - # Calculate volume integral - @trixi_timeit timer() "volume integral" begin - @unpack derivative_dhat = dg.basis - @threaded for element in eachelement(dg, cache) - - # Calculate volume terms in one element - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, element) + # Reset du + @trixi_timeit timer() "reset gradients" begin + reset_du!(gradients_x, dg, cache) + reset_du!(gradients_y, dg, cache) + reset_du!(gradients_z, dg, cache) + end - for ii in eachnode(dg) - multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, k, element) + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + @unpack derivative_dhat = dg.basis + @threaded for element in eachelement(dg, cache) + + # Calculate volume terms in one element + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, + element) + + for ii in eachnode(dg) + multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], + u_node, equations_parabolic, dg, ii, j, + k, element) + end + + for jj in eachnode(dg) + multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], + u_node, equations_parabolic, dg, i, jj, + k, element) + end + + for kk in eachnode(dg) + multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k], + u_node, equations_parabolic, dg, i, j, + kk, element) + end + end end + end - for jj in eachnode(dg) - multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, k, element) - end + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache_parabolic, u_transformed, mesh, equations_parabolic, + dg.surface_integral, dg) + end - for kk in eachnode(dg) - multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k], u_node, equations_parabolic, dg, i, j, kk, element) + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + @unpack surface_flux_values = cache_parabolic.elements + @unpack neighbor_ids, orientations = cache_parabolic.interfaces + + @threaded for interface in eachinterface(dg, cache_parabolic) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + # orientation = 3: left -> 6, right -> 5 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for j in eachnode(dg), i in eachnode(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, + equations_parabolic, dg, i, j, + interface) + flux = 0.5 * (u_ll + u_rr) + + # Copy flux to left and right element storage + for v in eachvariable(equations_parabolic) + surface_flux_values[v, i, j, left_direction, left_id] = flux[v] + surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + end + end end - end end - end - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin - @unpack surface_flux_values = cache_parabolic.elements - @unpack neighbor_ids, orientations = cache_parabolic.interfaces + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache_parabolic, u_transformed, mesh, equations_parabolic, + dg.surface_integral, dg) + end - @threaded for interface in eachinterface(dg, cache_parabolic) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - # orientation = 3: left -> 6, right -> 5 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for j in eachnode(dg), i in eachnode(dg) - # Call pointwise Riemann solver - u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u, - equations_parabolic, dg, i, j, interface) - flux = 0.5 * (u_ll + u_rr) - - # Copy flux to left and right element storage - for v in eachvariable(equations_parabolic) - surface_flux_values[v, i, j, left_direction, left_id] = flux[v] - surface_flux_values[v, i, j, right_direction, right_id] = flux[v] - end - end + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux_gradients!(cache_parabolic, t, boundary_conditions_parabolic, + mesh, equations_parabolic, + dg.surface_integral, dg) end - end - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!( - cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic, - dg.surface_integral, dg) - - # TODO: parabolic; mortars - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache_parabolic.elements - - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations_parabolic) - # surface at -x - gradients_x[v, 1, l, m, element] = ( - gradients_x[v, 1, l, m, element] - surface_flux_values[v, l, m, 1, element] * factor_1) - - # surface at +x - gradients_x[v, nnodes(dg), l, m, element] = ( - gradients_x[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2) - - # surface at -y - gradients_y[v, l, 1, m, element] = ( - gradients_y[v, l, 1, m, element] - surface_flux_values[v, l, m, 3, element] * factor_1) - - # surface at +y - gradients_y[v, l, nnodes(dg), m, element] = ( - gradients_y[v, l, nnodes(dg), m, element] + surface_flux_values[v, l, m, 4, element] * factor_2) - - # surface at -z - gradients_z[v, l, m, 1, element] = ( - gradients_z[v, l, m, 1, element] - surface_flux_values[v, l, m, 5, element] * factor_1) - - # surface at +z - gradients_z[v, l, m, nnodes(dg), element] = ( - gradients_z[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2) + + # TODO: parabolic; mortars + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache_parabolic.elements + + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + @threaded for element in eachelement(dg, cache) + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations_parabolic) + # surface at -x + gradients_x[v, 1, l, m, element] = (gradients_x[v, 1, l, m, + element] - + surface_flux_values[v, l, m, 1, + element] * + factor_1) + + # surface at +x + gradients_x[v, nnodes(dg), l, m, element] = (gradients_x[v, + nnodes(dg), + l, m, + element] + + surface_flux_values[v, + l, + m, + 2, + element] * + factor_2) + + # surface at -y + gradients_y[v, l, 1, m, element] = (gradients_y[v, l, 1, m, + element] - + surface_flux_values[v, l, m, 3, + element] * + factor_1) + + # surface at +y + gradients_y[v, l, nnodes(dg), m, element] = (gradients_y[v, l, + nnodes(dg), + m, + element] + + surface_flux_values[v, + l, + m, + 4, + element] * + factor_2) + + # surface at -z + gradients_z[v, l, m, 1, element] = (gradients_z[v, l, m, 1, + element] - + surface_flux_values[v, l, m, 5, + element] * + factor_1) + + # surface at +z + gradients_z[v, l, m, nnodes(dg), element] = (gradients_z[v, l, m, + nnodes(dg), + element] + + surface_flux_values[v, + l, + m, + 6, + element] * + factor_2) + end + end end - end end - end - # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" begin - apply_jacobian!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic) - apply_jacobian!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic) - apply_jacobian!(gradients_z, mesh, equations_parabolic, dg, cache_parabolic) - end + # Apply Jacobian from mapping to reference element + @trixi_timeit timer() "Jacobian" begin + apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, + cache_parabolic) + apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, + cache_parabolic) + apply_jacobian_parabolic!(gradients_z, mesh, equations_parabolic, dg, + cache_parabolic) + end - return nothing + return nothing end - # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. -function create_cache_parabolic(mesh::TreeMesh{3}, equations_hyperbolic::AbstractEquations, +function create_cache_parabolic(mesh::TreeMesh{3}, + equations_hyperbolic::AbstractEquations, equations_parabolic::AbstractEquationsParabolic, dg::DG, parabolic_scheme, RealT, uEltype) - # Get cells for which an element needs to be created (i.e. all leaf cells) - leaf_cell_ids = local_leaf_cells(mesh.tree) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) - elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype) + elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, + uEltype) - n_vars = nvariables(equations_hyperbolic) - n_nodes = nnodes(elements) - n_elements = nelements(elements) - u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_nodes, n_elements) - gradients = ntuple(_ -> similar(u_transformed), ndims(mesh)) - flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh)) + n_vars = nvariables(equations_hyperbolic) + n_nodes = nnodes(elements) + n_elements = nelements(elements) + u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_nodes, n_elements) + gradients = ntuple(_ -> similar(u_transformed), ndims(mesh)) + flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh)) - interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) - boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) - # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) - # cache = (; elements, interfaces, boundaries, mortars) - cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) + # cache = (; elements, interfaces, boundaries, mortars) + cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed) - # Add specialized parts of the cache required to compute the mortars etc. - # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...) + # Add specialized parts of the cache required to compute the mortars etc. + # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...) - return cache + return cache end - # Needed to *not* flip the sign of the inverse Jacobian. # This is because the parabolic fluxes are assumed to be of the form # `du/dt + df/dx = dg/dx + source(x,t)`, # where f(u) is the inviscid flux and g(u) is the viscous flux. -function apply_jacobian!(du, mesh::TreeMesh{3}, - equations::AbstractEquationsParabolic, dg::DG, cache) - - @threaded for element in eachelement(dg, cache) - factor = cache.elements.inverse_jacobian[element] +function apply_jacobian_parabolic!(du, mesh::TreeMesh{3}, + equations::AbstractEquationsParabolic, dg::DG, cache) + @threaded for element in eachelement(dg, cache) + factor = cache.elements.inverse_jacobian[element] - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor + end + end end - end - return nothing + return nothing end - end # @muladd diff --git a/src/solvers/dgsem_tree/dg_parallel.jl b/src/solvers/dgsem_tree/dg_parallel.jl index 7ca4bc159ee..c614fe0d0e6 100644 --- a/src/solvers/dgsem_tree/dg_parallel.jl +++ b/src/solvers/dgsem_tree/dg_parallel.jl @@ -3,27 +3,32 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Initialize MPI data structures. This works for both the # `TreeMesh` and the `P4estMesh` and is dimension-agnostic. -function init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, n_dims, nvars, n_nodes, uEltype) - data_size = nvars * n_nodes^(n_dims - 1) - n_small_elements = 2^(n_dims-1) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces)) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces)) - for index in 1:length(mpi_neighbor_interfaces) - mpi_send_buffers[index] = Vector{uEltype}(undef, length(mpi_neighbor_interfaces[index]) * data_size + - length(mpi_neighbor_mortars[index]) * n_small_elements * 2 * data_size) - mpi_recv_buffers[index] = Vector{uEltype}(undef, length(mpi_neighbor_interfaces[index]) * data_size + - length(mpi_neighbor_mortars[index]) * n_small_elements * 2 * data_size) - end +function init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, n_dims, + nvars, n_nodes, uEltype) + data_size = nvars * n_nodes^(n_dims - 1) + n_small_elements = 2^(n_dims - 1) + mpi_send_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces)) + for index in 1:length(mpi_neighbor_interfaces) + mpi_send_buffers[index] = Vector{uEltype}(undef, + length(mpi_neighbor_interfaces[index]) * + data_size + + length(mpi_neighbor_mortars[index]) * + n_small_elements * 2 * data_size) + mpi_recv_buffers[index] = Vector{uEltype}(undef, + length(mpi_neighbor_interfaces[index]) * + data_size + + length(mpi_neighbor_mortars[index]) * + n_small_elements * 2 * data_size) + end - mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) - mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) - return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests + return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests end - - end # muladd diff --git a/src/solvers/dgsem_tree/indicators.jl b/src/solvers/dgsem_tree/indicators.jl index 30d3b2c0448..2eb0af87148 100644 --- a/src/solvers/dgsem_tree/indicators.jl +++ b/src/solvers/dgsem_tree/indicators.jl @@ -3,21 +3,21 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent abstract type AbstractIndicator end -function create_cache(typ::Type{IndicatorType}, semi) where {IndicatorType<:AbstractIndicator} - create_cache(typ, mesh_equations_solver_cache(semi)...) +function create_cache(typ::Type{IndicatorType}, + semi) where {IndicatorType <: AbstractIndicator} + create_cache(typ, mesh_equations_solver_cache(semi)...) end -function get_element_variables!(element_variables, indicator::AbstractIndicator, ::VolumeIntegralShockCapturingHG) - element_variables[:indicator_shock_capturing] = indicator.cache.alpha - return nothing +function get_element_variables!(element_variables, indicator::AbstractIndicator, + ::VolumeIntegralShockCapturingHG) + element_variables[:indicator_shock_capturing] = indicator.cache.alpha + return nothing end - - """ IndicatorHennemannGassner(equations::AbstractEquations, basis; alpha_max=0.5, @@ -41,101 +41,103 @@ See also [`VolumeIntegralShockCapturingHG`](@ref). "A provably entropy stable subcell shock capturing approach for high order split form DG" [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044) """ -struct IndicatorHennemannGassner{RealT<:Real, Variable, Cache} <: AbstractIndicator - alpha_max::RealT - alpha_min::RealT - alpha_smooth::Bool - variable::Variable - cache::Cache +struct IndicatorHennemannGassner{RealT <: Real, Variable, Cache} <: AbstractIndicator + alpha_max::RealT + alpha_min::RealT + alpha_smooth::Bool + variable::Variable + cache::Cache end # this method is used when the indicator is constructed as for shock-capturing volume integrals function IndicatorHennemannGassner(equations::AbstractEquations, basis; - alpha_max=0.5, - alpha_min=0.001, - alpha_smooth=true, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, variable) - alpha_max, alpha_min = promote(alpha_max, alpha_min) - cache = create_cache(IndicatorHennemannGassner, equations, basis) - IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}( - alpha_max, alpha_min, alpha_smooth, variable, cache) + alpha_max, alpha_min = promote(alpha_max, alpha_min) + cache = create_cache(IndicatorHennemannGassner, equations, basis) + IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(alpha_max, + alpha_min, + alpha_smooth, + variable, + cache) end # this method is used when the indicator is constructed as for AMR function IndicatorHennemannGassner(semi::AbstractSemidiscretization; - alpha_max=0.5, - alpha_min=0.001, - alpha_smooth=true, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, variable) - alpha_max, alpha_min = promote(alpha_max, alpha_min) - cache = create_cache(IndicatorHennemannGassner, semi) - IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}( - alpha_max, alpha_min, alpha_smooth, variable, cache) + alpha_max, alpha_min = promote(alpha_max, alpha_min) + cache = create_cache(IndicatorHennemannGassner, semi) + IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(alpha_max, + alpha_min, + alpha_smooth, + variable, + cache) end - function Base.show(io::IO, indicator::IndicatorHennemannGassner) - @nospecialize indicator # reduce precompilation time - - print(io, "IndicatorHennemannGassner(") - print(io, indicator.variable) - print(io, ", alpha_max=", indicator.alpha_max) - print(io, ", alpha_min=", indicator.alpha_min) - print(io, ", alpha_smooth=", indicator.alpha_smooth) - print(io, ")") + @nospecialize indicator # reduce precompilation time + + print(io, "IndicatorHennemannGassner(") + print(io, indicator.variable) + print(io, ", alpha_max=", indicator.alpha_max) + print(io, ", alpha_min=", indicator.alpha_min) + print(io, ", alpha_smooth=", indicator.alpha_smooth) + print(io, ")") end function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorHennemannGassner) - @nospecialize indicator # reduce precompilation time - - if get(io, :compact, false) - show(io, indicator) - else - setup = [ - "indicator variable" => indicator.variable, - "max. α" => indicator.alpha_max, - "min. α" => indicator.alpha_min, - "smooth α" => (indicator.alpha_smooth ? "yes" : "no"), - ] - summary_box(io, "IndicatorHennemannGassner", setup) - end -end + @nospecialize indicator # reduce precompilation time + if get(io, :compact, false) + show(io, indicator) + else + setup = [ + "indicator variable" => indicator.variable, + "max. α" => indicator.alpha_max, + "min. α" => indicator.alpha_min, + "smooth α" => (indicator.alpha_smooth ? "yes" : "no"), + ] + summary_box(io, "IndicatorHennemannGassner", setup) + end +end function (indicator_hg::IndicatorHennemannGassner)(u, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack alpha_smooth = indicator_hg - @unpack alpha, alpha_tmp = indicator_hg.cache - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end - - # magic parameters - threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25) - parameter_s = log((1 - 0.0001) / 0.0001) - - @threaded for element in eachelement(dg, cache) - # This is dispatched by mesh dimension. - # Use this function barrier and unpack inside to avoid passing closures to - # Polyester.jl with `@batch` (`@threaded`). - # Otherwise, `@threaded` does not work here with Julia ARM on macOS. - # See https://github.com/JuliaSIMD/Polyester.jl/issues/88. - calc_indicator_hennemann_gassner!( - indicator_hg, threshold, parameter_s, u, - element, mesh, equations, dg, cache) - end - - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end - - return alpha -end + @unpack alpha_smooth = indicator_hg + @unpack alpha, alpha_tmp = indicator_hg.cache + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) + end + # magic parameters + threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25) + parameter_s = log((1 - 0.0001) / 0.0001) + + @threaded for element in eachelement(dg, cache) + # This is dispatched by mesh dimension. + # Use this function barrier and unpack inside to avoid passing closures to + # Polyester.jl with `@batch` (`@threaded`). + # Otherwise, `@threaded` does not work here with Julia ARM on macOS. + # See https://github.com/JuliaSIMD/Polyester.jl/issues/88. + calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u, + element, mesh, equations, dg, cache) + end + + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) + end + + return alpha +end """ IndicatorLöhner (equivalent to IndicatorLoehner) @@ -159,59 +161,60 @@ and `basis` if this indicator should be used for shock capturing. [doi: 10.1016/0045-7825(87)90098-3](https://doi.org/10.1016/0045-7825(87)90098-3) - http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node59.html#SECTION05163100000000000000 """ -struct IndicatorLöhner{RealT<:Real, Variable, Cache} <: AbstractIndicator - f_wave::RealT # TODO: Taal documentation - variable::Variable - cache::Cache +struct IndicatorLöhner{RealT <: Real, Variable, Cache} <: AbstractIndicator + f_wave::RealT # TODO: Taal documentation + variable::Variable + cache::Cache end # this method is used when the indicator is constructed as for shock-capturing volume integrals function IndicatorLöhner(equations::AbstractEquations, basis; - f_wave=0.2, variable) - cache = create_cache(IndicatorLöhner, equations, basis) - IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, cache) + f_wave = 0.2, variable) + cache = create_cache(IndicatorLöhner, equations, basis) + IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, + cache) end # this method is used when the indicator is constructed as for AMR function IndicatorLöhner(semi::AbstractSemidiscretization; - f_wave=0.2, variable) - cache = create_cache(IndicatorLöhner, semi) - IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, cache) + f_wave = 0.2, variable) + cache = create_cache(IndicatorLöhner, semi) + IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, + cache) end - function Base.show(io::IO, indicator::IndicatorLöhner) - @nospecialize indicator # reduce precompilation time + @nospecialize indicator # reduce precompilation time - print(io, "IndicatorLöhner(") - print(io, "f_wave=", indicator.f_wave, ", variable=", indicator.variable, ")") + print(io, "IndicatorLöhner(") + print(io, "f_wave=", indicator.f_wave, ", variable=", indicator.variable, ")") end function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorLöhner) - @nospecialize indicator # reduce precompilation time - - if get(io, :compact, false) - show(io, indicator) - else - setup = [ - "indicator variable" => indicator.variable, - "f_wave" => indicator.f_wave, - ] - summary_box(io, "IndicatorLöhner", setup) - end + @nospecialize indicator # reduce precompilation time + + if get(io, :compact, false) + show(io, indicator) + else + setup = [ + "indicator variable" => indicator.variable, + "f_wave" => indicator.f_wave, + ] + summary_box(io, "IndicatorLöhner", setup) + end end const IndicatorLoehner = IndicatorLöhner # dirty Löhner estimate, direction by direction, assuming constant nodes -@inline function local_löhner_estimate(um::Real, u0::Real, up::Real, löhner::IndicatorLöhner) - num = abs(up - 2 * u0 + um) - den = abs(up - u0) + abs(u0-um) + löhner.f_wave * (abs(up) + 2 * abs(u0) + abs(um)) - return num / den +@inline function local_löhner_estimate(um::Real, u0::Real, up::Real, + löhner::IndicatorLöhner) + num = abs(up - 2 * u0 + um) + den = abs(up - u0) + abs(u0 - um) + + löhner.f_wave * (abs(up) + 2 * abs(u0) + abs(um)) + return num / den end - - """ IndicatorMax(equations::AbstractEquations, basis; variable) IndicatorMax(semi::AbstractSemidiscretization; variable) @@ -220,44 +223,43 @@ A simple indicator returning the maximum of `variable` in an element. When constructed to be used for AMR, pass the `semi`. Pass the `equations`, and `basis` if this indicator should be used for shock capturing. """ -struct IndicatorMax{Variable, Cache<:NamedTuple} <: AbstractIndicator - variable::Variable - cache::Cache +struct IndicatorMax{Variable, Cache <: NamedTuple} <: AbstractIndicator + variable::Variable + cache::Cache end # this method is used when the indicator is constructed as for shock-capturing volume integrals function IndicatorMax(equations::AbstractEquations, basis; variable) - cache = create_cache(IndicatorMax, equations, basis) - IndicatorMax{typeof(variable), typeof(cache)}(variable, cache) + cache = create_cache(IndicatorMax, equations, basis) + IndicatorMax{typeof(variable), typeof(cache)}(variable, cache) end # this method is used when the indicator is constructed as for AMR function IndicatorMax(semi::AbstractSemidiscretization; variable) - cache = create_cache(IndicatorMax, semi) - return IndicatorMax{typeof(variable), typeof(cache)}(variable, cache) + cache = create_cache(IndicatorMax, semi) + return IndicatorMax{typeof(variable), typeof(cache)}(variable, cache) end - function Base.show(io::IO, indicator::IndicatorMax) - @nospecialize indicator # reduce precompilation time + @nospecialize indicator # reduce precompilation time - print(io, "IndicatorMax(") - print(io, "variable=", indicator.variable, ")") + print(io, "IndicatorMax(") + print(io, "variable=", indicator.variable, ")") end function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorMax) - @nospecialize indicator # reduce precompilation time - - if get(io, :compact, false) - show(io, indicator) - else - setup = [ - "indicator variable" => indicator.variable, - ] - summary_box(io, "IndicatorMax", setup) - end + @nospecialize indicator # reduce precompilation time + + if get(io, :compact, false) + show(io, indicator) + else + setup = [ + "indicator variable" => indicator.variable, + ] + summary_box(io, "IndicatorMax", setup) + end end """ @@ -293,127 +295,132 @@ If `alpha_continuous == false`, the blending factor is set to `alpha = 0` for go This is an experimental feature and may change in future releases. """ -struct IndicatorNeuralNetwork{IndicatorType, RealT<:Real, Variable, Chain, Cache} <: AbstractIndicator - indicator_type::IndicatorType - alpha_max::RealT - alpha_min::RealT - alpha_smooth::Bool - alpha_continuous::Bool - alpha_amr::Bool - variable::Variable - network::Chain - cache::Cache +struct IndicatorNeuralNetwork{IndicatorType, RealT <: Real, Variable, Chain, Cache} <: + AbstractIndicator + indicator_type::IndicatorType + alpha_max::RealT + alpha_min::RealT + alpha_smooth::Bool + alpha_continuous::Bool + alpha_amr::Bool + variable::Variable + network::Chain + cache::Cache end # this method is used when the indicator is constructed as for shock-capturing volume integrals function IndicatorNeuralNetwork(equations::AbstractEquations, basis; indicator_type, - alpha_max=0.5, - alpha_min=0.001, - alpha_smooth=true, - alpha_continuous=true, - alpha_amr=false, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, + alpha_continuous = true, + alpha_amr = false, variable, network) - alpha_max, alpha_min = promote(alpha_max, alpha_min) - IndicatorType = typeof(indicator_type) - cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, equations, basis) - IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), typeof(network), typeof(cache)}( - indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, - network, cache) + alpha_max, alpha_min = promote(alpha_max, alpha_min) + IndicatorType = typeof(indicator_type) + cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, equations, basis) + IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), + typeof(network), typeof(cache)}(indicator_type, alpha_max, + alpha_min, alpha_smooth, + alpha_continuous, alpha_amr, + variable, + network, cache) end # this method is used when the indicator is constructed as for AMR function IndicatorNeuralNetwork(semi::AbstractSemidiscretization; indicator_type, - alpha_max=0.5, - alpha_min=0.001, - alpha_smooth=true, - alpha_continuous=true, - alpha_amr=true, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, + alpha_continuous = true, + alpha_amr = true, variable, network) - alpha_max, alpha_min = promote(alpha_max, alpha_min) - IndicatorType = typeof(indicator_type) - cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, semi) - IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), typeof(network), typeof(cache)}( - indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, - network, cache) + alpha_max, alpha_min = promote(alpha_max, alpha_min) + IndicatorType = typeof(indicator_type) + cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, semi) + IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), + typeof(network), typeof(cache)}(indicator_type, alpha_max, + alpha_min, alpha_smooth, + alpha_continuous, alpha_amr, + variable, + network, cache) end - function Base.show(io::IO, indicator::IndicatorNeuralNetwork) - @nospecialize indicator # reduce precompilation time - - print(io, "IndicatorNeuralNetwork(") - print(io, indicator.indicator_type) - print(io, ", alpha_max=", indicator.alpha_max) - print(io, ", alpha_min=", indicator.alpha_min) - print(io, ", alpha_smooth=", indicator.alpha_smooth) - print(io, ", alpha_continuous=", indicator.alpha_continuous) - print(io, indicator.variable) - print(io, ")") + @nospecialize indicator # reduce precompilation time + + print(io, "IndicatorNeuralNetwork(") + print(io, indicator.indicator_type) + print(io, ", alpha_max=", indicator.alpha_max) + print(io, ", alpha_min=", indicator.alpha_min) + print(io, ", alpha_smooth=", indicator.alpha_smooth) + print(io, ", alpha_continuous=", indicator.alpha_continuous) + print(io, indicator.variable) + print(io, ")") end function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorNeuralNetwork) - @nospecialize indicator # reduce precompilation time - - if get(io, :compact, false) - show(io, indicator) - else - setup = [ - "indicator type" => indicator.indicator_type, - "max. α" => indicator.alpha_max, - "min. α" => indicator.alpha_min, - "smooth α" => (indicator.alpha_smooth ? "yes" : "no"), - "continuous α" => (indicator.alpha_continuous ? "yes" : "no"), - "indicator variable" => indicator.variable, - ] - summary_box(io, "IndicatorNeuralNetwork", setup) - end -end - - -# Convert probability for troubled cell to indicator value for shockcapturing/AMR -@inline function probability_to_indicator(probability_troubled_cell, alpha_continuous, alpha_amr, - alpha_min, alpha_max) - # Initialize indicator to zero - alpha_element = zero(probability_troubled_cell) + @nospecialize indicator # reduce precompilation time - if alpha_continuous && !alpha_amr - # Set good cells to 0 and troubled cells to continuous value of the network prediction - if probability_troubled_cell > 0.5 - alpha_element = probability_troubled_cell + if get(io, :compact, false) + show(io, indicator) else - alpha_element = zero(probability_troubled_cell) - end - - # Take care of the case close to pure FV - if alpha_element > 1 - alpha_min - alpha_element = one(alpha_element) + setup = [ + "indicator type" => indicator.indicator_type, + "max. α" => indicator.alpha_max, + "min. α" => indicator.alpha_min, + "smooth α" => (indicator.alpha_smooth ? "yes" : "no"), + "continuous α" => (indicator.alpha_continuous ? "yes" : "no"), + "indicator variable" => indicator.variable, + ] + summary_box(io, "IndicatorNeuralNetwork", setup) end +end - # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha - alpha_element *= alpha_max - elseif !alpha_continuous && !alpha_amr - # Set good cells to 0 and troubled cells to 1 - if probability_troubled_cell > 0.5 - alpha_element = alpha_max - else - alpha_element = zero(alpha_max) +# Convert probability for troubled cell to indicator value for shockcapturing/AMR +@inline function probability_to_indicator(probability_troubled_cell, alpha_continuous, + alpha_amr, + alpha_min, alpha_max) + # Initialize indicator to zero + alpha_element = zero(probability_troubled_cell) + + if alpha_continuous && !alpha_amr + # Set good cells to 0 and troubled cells to continuous value of the network prediction + if probability_troubled_cell > 0.5 + alpha_element = probability_troubled_cell + else + alpha_element = zero(probability_troubled_cell) + end + + # Take care of the case close to pure FV + if alpha_element > 1 - alpha_min + alpha_element = one(alpha_element) + end + + # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha + alpha_element *= alpha_max + elseif !alpha_continuous && !alpha_amr + # Set good cells to 0 and troubled cells to 1 + if probability_troubled_cell > 0.5 + alpha_element = alpha_max + else + alpha_element = zero(alpha_max) + end + elseif alpha_amr + # The entire continuous output of the neural network is used for AMR + alpha_element = probability_troubled_cell + + # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha + alpha_element *= alpha_max end - elseif alpha_amr - # The entire continuous output of the neural network is used for AMR - alpha_element = probability_troubled_cell - - # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha - alpha_element *= alpha_max - end - return alpha_element + return alpha_element end - """ NeuralNetworkPerssonPeraire @@ -449,5 +456,4 @@ Indicator type for creating an `IndicatorNeuralNetwork` indicator. See also: [`IndicatorNeuralNetwork`](@ref) """ struct NeuralNetworkCNN end - end # @muladd diff --git a/src/solvers/dgsem_tree/indicators_1d.jl b/src/solvers/dgsem_tree/indicators_1d.jl index c1a88161245..e722584bb2e 100644 --- a/src/solvers/dgsem_tree/indicators_1d.jl +++ b/src/solvers/dgsem_tree/indicators_1d.jl @@ -3,397 +3,414 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) +function create_cache(::Type{IndicatorHennemannGassner}, + equations::AbstractEquations{1}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded) + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, + equations::AbstractEquations{1}, dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end - # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl # with @batch (@threaded). # Otherwise, @threaded does not work here with Julia ARM on macOS. # See https://github.com/JuliaSIMD/Polyester.jl/issues/88. -@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u, +@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, + u, element, mesh::AbstractMesh{1}, equations, dg, cache) - @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache - - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - - # Calculate indicator variables at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - indicator[i] = indicator_hg.variable(u_local, equations) - end - - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator) - - # Calculate total energies for all modes, without highest, without two highest - total_energy = zero(eltype(modal)) - for i in 1:nnodes(dg) - total_energy += modal[i]^2 - end - total_energy_clip1 = zero(eltype(modal)) - for i in 1:(nnodes(dg)-1) - total_energy_clip1 += modal[i]^2 - end - total_energy_clip2 = zero(eltype(modal)) - for i in 1:(nnodes(dg)-2) - total_energy_clip2 += modal[i]^2 - end - - # Calculate energy in higher modes - if !(iszero(total_energy)) - energy_frac_1 = (total_energy - total_energy_clip1) / total_energy - else - energy_frac_1 = zero(total_energy) - end - if !(iszero(total_energy_clip1)) - energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 - else - energy_frac_2 = zero(total_energy_clip1) - end - energy = max(energy_frac_1, energy_frac_2) - - alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) - - # Take care of the case close to pure DG - if alpha_element < alpha_min - alpha_element = zero(alpha_element) - end - - # Take care of the case close to pure FV - if alpha_element > 1 - alpha_min - alpha_element = one(alpha_element) - end - - # Clip the maximum amount of FV allowed - alpha[element] = min(alpha_max, alpha_element) + @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache + + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] + + # Calculate indicator variables at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + indicator[i] = indicator_hg.variable(u_local, equations) + end + + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator) + + # Calculate total energies for all modes, without highest, without two highest + total_energy = zero(eltype(modal)) + for i in 1:nnodes(dg) + total_energy += modal[i]^2 + end + total_energy_clip1 = zero(eltype(modal)) + for i in 1:(nnodes(dg) - 1) + total_energy_clip1 += modal[i]^2 + end + total_energy_clip2 = zero(eltype(modal)) + for i in 1:(nnodes(dg) - 2) + total_energy_clip2 += modal[i]^2 + end + + # Calculate energy in higher modes + if !(iszero(total_energy)) + energy_frac_1 = (total_energy - total_energy_clip1) / total_energy + else + energy_frac_1 = zero(total_energy) + end + if !(iszero(total_energy_clip1)) + energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + else + energy_frac_2 = zero(total_energy_clip1) + end + energy = max(energy_frac_1, energy_frac_2) + + alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) + + # Take care of the case close to pure DG + if alpha_element < alpha_min + alpha_element = zero(alpha_element) + end + + # Take care of the case close to pure FV + if alpha_element > 1 - alpha_min + alpha_element = one(alpha_element) + end + + # Clip the maximum amount of FV allowed + alpha[element] = min(alpha_max, alpha_element) end # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha -function apply_smoothing!(mesh::Union{TreeMesh{1}, P4estMesh{1}}, alpha, alpha_tmp, dg, cache) - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha - - # Loop over interfaces - for interface in eachinterface(dg, cache) - # Get neighboring element ids - left = cache.interfaces.neighbor_ids[1, interface] - right = cache.interfaces.neighbor_ids[2, interface] - - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) - alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) - end +function apply_smoothing!(mesh::Union{TreeMesh{1}, P4estMesh{1}}, alpha, alpha_tmp, dg, + cache) + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha + + # Loop over interfaces + for interface in eachinterface(dg, cache) + # Get neighboring element ids + left = cache.interfaces.neighbor_ids[1, interface] + right = cache.interfaces.neighbor_ids[2, interface] + + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) + alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) + end end - # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() +function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{1}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - return (; alpha, indicator_threaded) + return (; alpha, indicator_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{1}, + dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end - -function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,3}, +function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 3}, mesh, equations, dg::DGSEM, cache; kwargs...) - @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" - @unpack alpha, indicator_threaded = löhner.cache - resize!(alpha, nelements(dg, cache)) + @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" + @unpack alpha, indicator_threaded = löhner.cache + resize!(alpha, nelements(dg, cache)) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - # Calculate indicator variables at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - indicator[i] = löhner.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + indicator[i] = löhner.variable(u_local, equations) + end - estimate = zero(real(dg)) - for i in 2:nnodes(dg)-1 - # x direction - u0 = indicator[i ] - up = indicator[i+1] - um = indicator[i-1] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) - end + estimate = zero(real(dg)) + for i in 2:(nnodes(dg) - 1) + # x direction + u0 = indicator[i] + up = indicator[i + 1] + um = indicator[i - 1] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end - # use the maximum as DG element indicator - alpha[element] = estimate - end + # use the maximum as DG element indicator + alpha[element] = estimate + end - return alpha + return alpha end - # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() +function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{1}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()] - return (; alpha, indicator_threaded) + return (; alpha, indicator_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache) - cache = create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{1}, + dg::DGSEM, cache) + cache = create_cache(typ, equations, dg.basis) end - -function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,3}, +function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 3}, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack alpha, indicator_threaded = indicator_max.cache - resize!(alpha, nelements(dg, cache)) + @unpack alpha, indicator_threaded = indicator_max.cache + resize!(alpha, nelements(dg, cache)) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - # Calculate indicator variables at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - indicator[i] = indicator_max.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + indicator[i] = indicator_max.variable(u_local, equations) + end - alpha[element] = maximum(indicator) - end + alpha[element] = maximum(indicator) + end - return alpha + return alpha end # this method is used when the indicator is constructed as for shock-capturing volume integrals # empty cache is default function create_cache(::Type{<:IndicatorNeuralNetwork}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) - return NamedTuple() + return NamedTuple() end # cache for NeuralNetworkPerssonPeraire-type indicator function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire}}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + A = Array{real(basis), ndims(equations)} - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} - - prototype = A(undef, nnodes(basis)) - indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + prototype = A(undef, nnodes(basis)) + indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded) + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded) end # cache for NeuralNetworkRayHesthaven-type indicator function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkRayHesthaven}}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + A = Array{real(basis), ndims(equations)} - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} - - prototype = A(undef, nnodes(basis)) - indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - neighbor_ids = Vector{Int}(undef, 2) + prototype = A(undef, nnodes(basis)) + indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + neighbor_ids = Vector{Int}(undef, 2) - return (; alpha, alpha_tmp, indicator_threaded, neighbor_ids) + return (; alpha, alpha_tmp, indicator_threaded, neighbor_ids) end # this method is used when the indicator is constructed as for AMR function create_cache(typ::Type{<:IndicatorNeuralNetwork}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) + create_cache(typ, equations, dg.basis) end -function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})( - u::AbstractArray{<:Any,3}, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann - - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_ann.cache - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end - - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - - # Calculate indicator variables at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - indicator[i] = indicator_ann.variable(u_local, equations) +function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(u::AbstractArray{ + <:Any, + 3 + }, + mesh, + equations, + dg::DGSEM, + cache; + kwargs...) + @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_ann.cache + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) end - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator) + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] - # Calculate total energies for all modes, without highest, without two highest - total_energy = zero(eltype(modal)) - for i in 1:nnodes(dg) - total_energy += modal[i]^2 - end - total_energy_clip1 = zero(eltype(modal)) - for i in 1:(nnodes(dg)-1) - total_energy_clip1 += modal[i]^2 - end - total_energy_clip2 = zero(eltype(modal)) - for i in 1:(nnodes(dg)-2) - total_energy_clip2 += modal[i]^2 - end + # Calculate indicator variables at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + indicator[i] = indicator_ann.variable(u_local, equations) + end - # Calculate energy in highest modes - X1 = (total_energy - total_energy_clip1)/total_energy - X2 = (total_energy_clip1 - total_energy_clip2)/total_energy_clip1 - - # There are two versions of the network: - # The first one only takes the highest energy modes as input, the second one also the number of - # nodes. Automatically use the correct input by checking the number of inputs of the network. - if size(params(network)[1],2) == 2 - network_input = SVector(X1, X2) - elseif size(params(network)[1],2) == 3 - network_input = SVector(X1, X2, nnodes(dg)) - end + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator) - # Scale input data - network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input))) - probability_troubled_cell = network(network_input)[1] + # Calculate total energies for all modes, without highest, without two highest + total_energy = zero(eltype(modal)) + for i in 1:nnodes(dg) + total_energy += modal[i]^2 + end + total_energy_clip1 = zero(eltype(modal)) + for i in 1:(nnodes(dg) - 1) + total_energy_clip1 += modal[i]^2 + end + total_energy_clip2 = zero(eltype(modal)) + for i in 1:(nnodes(dg) - 2) + total_energy_clip2 += modal[i]^2 + end - # Compute indicator value - alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous, - alpha_amr, alpha_min, alpha_max) - end + # Calculate energy in highest modes + X1 = (total_energy - total_energy_clip1) / total_energy + X2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + + # There are two versions of the network: + # The first one only takes the highest energy modes as input, the second one also the number of + # nodes. Automatically use the correct input by checking the number of inputs of the network. + if size(params(network)[1], 2) == 2 + network_input = SVector(X1, X2) + elseif size(params(network)[1], 2) == 3 + network_input = SVector(X1, X2, nnodes(dg)) + end - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end + # Scale input data + network_input = network_input / + max(maximum(abs, network_input), one(eltype(network_input))) + probability_troubled_cell = network(network_input)[1] - return alpha -end + # Compute indicator value + alpha[element] = probability_to_indicator(probability_troubled_cell, + alpha_continuous, + alpha_amr, alpha_min, alpha_max) + end -function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})( - u::AbstractArray{<:Any,3}, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) + end - @unpack alpha, alpha_tmp, indicator_threaded, neighbor_ids = indicator_ann.cache - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end + return alpha +end - c2e = zeros(Int, length(mesh.tree)) - for element in eachelement(dg, cache) - c2e[cache.elements.cell_ids[element]] = element - end +function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(u::AbstractArray{ + <:Any, + 3 + }, + mesh, + equations, + dg::DGSEM, + cache; + kwargs...) + @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + + @unpack alpha, alpha_tmp, indicator_threaded, neighbor_ids = indicator_ann.cache + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) + end + c2e = zeros(Int, length(mesh.tree)) + for element in eachelement(dg, cache) + c2e[cache.elements.cell_ids[element]] = element + end - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] - cell_id = cache.elements.cell_ids[element] - - for direction in eachdirection(mesh.tree) - if !has_any_neighbor(mesh.tree, cell_id, direction) - neighbor_ids[direction] = element_id - continue - end - if has_neighbor(mesh.tree, cell_id, direction) - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor - if direction == 1 - neighbor_ids[direction] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]] - else - neighbor_ids[direction] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]] - end - else # Cell has same refinement level neighbor - neighbor_ids[direction] = c2e[neighbor_cell_id] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] + cell_id = cache.elements.cell_ids[element] + + for direction in eachdirection(mesh.tree) + if !has_any_neighbor(mesh.tree, cell_id, direction) + neighbor_ids[direction] = element + continue + end + if has_neighbor(mesh.tree, cell_id, direction) + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor + if direction == 1 + neighbor_ids[direction] = c2e[mesh.tree.child_ids[2, + neighbor_cell_id]] + else + neighbor_ids[direction] = c2e[mesh.tree.child_ids[1, + neighbor_cell_id]] + end + else # Cell has same refinement level neighbor + neighbor_ids[direction] = c2e[neighbor_cell_id] + end + else # Cell is small and has large neighbor + parent_id = mesh.tree.parent_ids[cell_id] + neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id] + neighbor_ids[direction] = c2e[neighbor_cell_id] + end end - else # Cell is small and has large neighbor - parent_id = mesh.tree.parent_ids[cell_id] - neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id] - neighbor_ids[direction] = c2e[neighbor_cell_id] - end - end - # Calculate indicator variables at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, element) - indicator[i] = indicator_ann.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, element) + indicator[i] = indicator_ann.variable(u_local, equations) + end + # Cell average and interface values of the cell + X2 = sum(indicator) / nnodes(dg) + X4 = indicator[1] + X5 = indicator[end] - # Cell average and interface values of the cell - X2 = sum(indicator)/nnodes(dg) - X4 = indicator[1] - X5 = indicator[end] + # Calculate indicator variables from left neighboring cell at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, neighbor_ids[1]) + indicator[i] = indicator_ann.variable(u_local, equations) + end + X1 = sum(indicator) / nnodes(dg) - # Calculate indicator variables from left neighboring cell at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, neighbor_ids[1]) - indicator[i] = indicator_ann.variable(u_local, equations) + # Calculate indicator variables from right neighboring cell at Gauss-Lobatto nodes + for i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, neighbor_ids[2]) + indicator[i] = indicator_ann.variable(u_local, equations) + end + X3 = sum(indicator) / nnodes(dg) + network_input = SVector(X1, X2, X3, X4, X5) + + # Scale input data + network_input = network_input / + max(maximum(abs, network_input), one(eltype(network_input))) + probability_troubled_cell = network(network_input)[1] + + # Compute indicator value + alpha[element] = probability_to_indicator(probability_troubled_cell, + alpha_continuous, + alpha_amr, alpha_min, alpha_max) end - X1 = sum(indicator)/nnodes(dg) - # Calculate indicator variables from right neighboring cell at Gauss-Lobatto nodes - for i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, neighbor_ids[2]) - indicator[i] = indicator_ann.variable(u_local, equations) + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) end - X3 = sum(indicator)/nnodes(dg) - network_input = SVector(X1, X2, X3, X4, X5) - - # Scale input data - network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input))) - probability_troubled_cell = network(network_input)[1] - - # Compute indicator value - alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous, - alpha_amr, alpha_min, alpha_max) - end - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end - - return alpha + return alpha end - end # @muladd diff --git a/src/solvers/dgsem_tree/indicators_2d.jl b/src/solvers/dgsem_tree/indicators_2d.jl index eb08657563b..085cb71ad0c 100644 --- a/src/solvers/dgsem_tree/indicators_2d.jl +++ b/src/solvers/dgsem_tree/indicators_2d.jl @@ -3,544 +3,579 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded) +function create_cache(::Type{IndicatorHennemannGassner}, + equations::AbstractEquations{2}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + modal_threaded = [A(undef, nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, + equations::AbstractEquations{2}, dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end - # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl # with @batch (@threaded). # Otherwise, @threaded does not work here with Julia ARM on macOS. # See https://github.com/JuliaSIMD/Polyester.jl/issues/88. -@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u, +@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, + u, element, mesh::AbstractMesh{2}, equations, dg, cache) - @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, - modal_tmp1_threaded = indicator_hg.cache - - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] - - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = indicator_hg.variable(u_local, equations) - end - - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1) - - # Calculate total energies for all modes, without highest, without two highest - total_energy = zero(eltype(modal)) - for j in 1:nnodes(dg), i in 1:nnodes(dg) - total_energy += modal[i, j]^2 - end - total_energy_clip1 = zero(eltype(modal)) - for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1) - total_energy_clip1 += modal[i, j]^2 - end - total_energy_clip2 = zero(eltype(modal)) - for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2) - total_energy_clip2 += modal[i, j]^2 - end - - # Calculate energy in higher modes - if !(iszero(total_energy)) - energy_frac_1 = (total_energy - total_energy_clip1) / total_energy - else - energy_frac_1 = zero(total_energy) - end - if !(iszero(total_energy_clip1)) - energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 - else - energy_frac_2 = zero(total_energy_clip1) - end - energy = max(energy_frac_1, energy_frac_2) - - alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) - - # Take care of the case close to pure DG - if alpha_element < alpha_min - alpha_element = zero(alpha_element) - end - - # Take care of the case close to pure FV - if alpha_element > 1 - alpha_min - alpha_element = one(alpha_element) - end - - # Clip the maximum amount of FV allowed - alpha[element] = min(alpha_max, alpha_element) -end + @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, + modal_tmp1_threaded = indicator_hg.cache + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] + modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] -# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha -function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}}, alpha, alpha_tmp, dg, cache) - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha - - # Loop over interfaces - for interface in eachinterface(dg, cache) - # Get neighboring element ids - left = cache.interfaces.neighbor_ids[1, interface] - right = cache.interfaces.neighbor_ids[2, interface] - - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) - alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) - end - - # Loop over L2 mortars - for mortar in eachmortar(dg, cache) - # Get neighboring element ids - lower = cache.mortars.neighbor_ids[1, mortar] - upper = cache.mortars.neighbor_ids[2, mortar] - large = cache.mortars.neighbor_ids[3, mortar] - - # Apply smoothing - alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[large], alpha[lower]) - alpha[upper] = max(alpha_tmp[upper], 0.5 * alpha_tmp[large], alpha[upper]) - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower], alpha[large]) - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper], alpha[large]) - end - - return alpha + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = indicator_hg.variable(u_local, equations) + end + + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator, modal_tmp1) + + # Calculate total energies for all modes, without highest, without two highest + total_energy = zero(eltype(modal)) + for j in 1:nnodes(dg), i in 1:nnodes(dg) + total_energy += modal[i, j]^2 + end + total_energy_clip1 = zero(eltype(modal)) + for j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1) + total_energy_clip1 += modal[i, j]^2 + end + total_energy_clip2 = zero(eltype(modal)) + for j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2) + total_energy_clip2 += modal[i, j]^2 + end + + # Calculate energy in higher modes + if !(iszero(total_energy)) + energy_frac_1 = (total_energy - total_energy_clip1) / total_energy + else + energy_frac_1 = zero(total_energy) + end + if !(iszero(total_energy_clip1)) + energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + else + energy_frac_2 = zero(total_energy_clip1) + end + energy = max(energy_frac_1, energy_frac_2) + + alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) + + # Take care of the case close to pure DG + if alpha_element < alpha_min + alpha_element = zero(alpha_element) + end + + # Take care of the case close to pure FV + if alpha_element > 1 - alpha_min + alpha_element = one(alpha_element) + end + + # Clip the maximum amount of FV allowed + alpha[element] = min(alpha_max, alpha_element) end +# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha +function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}}, alpha, alpha_tmp, dg, + cache) + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha + + # Loop over interfaces + for interface in eachinterface(dg, cache) + # Get neighboring element ids + left = cache.interfaces.neighbor_ids[1, interface] + right = cache.interfaces.neighbor_ids[2, interface] + + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) + alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) + end -# this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) + # Loop over L2 mortars + for mortar in eachmortar(dg, cache) + # Get neighboring element ids + lower = cache.mortars.neighbor_ids[1, mortar] + upper = cache.mortars.neighbor_ids[2, mortar] + large = cache.mortars.neighbor_ids[3, mortar] + + # Apply smoothing + alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[large], alpha[lower]) + alpha[upper] = max(alpha_tmp[upper], 0.5 * alpha_tmp[large], alpha[upper]) + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower], alpha[large]) + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper], alpha[large]) + end + + return alpha +end - alpha = Vector{real(basis)}() +# this method is used when the indicator is constructed as for shock-capturing volume integrals +function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{2}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] - return (; alpha, indicator_threaded) + return (; alpha, indicator_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{2}, + dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end - -function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,4}, +function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 4}, mesh, equations, dg::DGSEM, cache; kwargs...) - @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" - @unpack alpha, indicator_threaded = löhner.cache - resize!(alpha, nelements(dg, cache)) + @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" + @unpack alpha, indicator_threaded = löhner.cache + resize!(alpha, nelements(dg, cache)) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = löhner.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = löhner.variable(u_local, equations) + end - estimate = zero(real(dg)) - for j in eachnode(dg), i in 2:nnodes(dg)-1 - # x direction - u0 = indicator[i, j] - up = indicator[i+1, j] - um = indicator[i-1, j] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) - end + estimate = zero(real(dg)) + for j in eachnode(dg), i in 2:(nnodes(dg) - 1) + # x direction + u0 = indicator[i, j] + up = indicator[i + 1, j] + um = indicator[i - 1, j] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end - for j in 2:nnodes(dg)-1, i in eachnode(dg) - # y direction - u0 = indicator[i, j ] - up = indicator[i, j+1] - um = indicator[i, j-1] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) - end + for j in 2:(nnodes(dg) - 1), i in eachnode(dg) + # y direction + u0 = indicator[i, j] + up = indicator[i, j + 1] + um = indicator[i, j - 1] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end - # use the maximum as DG element indicator - alpha[element] = estimate - end + # use the maximum as DG element indicator + alpha[element] = estimate + end - return alpha + return alpha end - - # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() +function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{2}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] - return (; alpha, indicator_threaded) + return (; alpha, indicator_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache) - cache = create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{2}, + dg::DGSEM, cache) + cache = create_cache(typ, equations, dg.basis) end - -function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,4}, +function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 4}, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack alpha, indicator_threaded = indicator_max.cache - resize!(alpha, nelements(dg, cache)) + @unpack alpha, indicator_threaded = indicator_max.cache + resize!(alpha, nelements(dg, cache)) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = indicator_max.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = indicator_max.variable(u_local, equations) + end - alpha[element] = maximum(indicator) - end + alpha[element] = maximum(indicator) + end - return alpha + return alpha end # this method is used when the indicator is constructed as for shock-capturing volume integrals # empty cache is default function create_cache(::Type{IndicatorNeuralNetwork}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) - return NamedTuple() + return NamedTuple() end # cache for NeuralNetworkPerssonPeraire-type indicator function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire}}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + A = Array{real(basis), ndims(equations)} - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} + @assert nnodes(basis)>=4 "Indicator only works for nnodes >= 4 (polydeg > 2)" - @assert nnodes(basis) >= 4 "Indicator only works for nnodes >= 4 (polydeg > 2)" + prototype = A(undef, nnodes(basis), nnodes(basis)) + indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - prototype = A(undef, nnodes(basis), nnodes(basis)) - indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded) + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded) end # cache for NeuralNetworkRayHesthaven-type indicator function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkRayHesthaven}}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + A = Array{real(basis), ndims(equations)} - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} - - prototype = A(undef, nnodes(basis), nnodes(basis)) - indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + prototype = A(undef, nnodes(basis), nnodes(basis)) + indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - network_input = Vector{Float64}(undef, 15) - neighbor_ids= Array{Int64}(undef, 8) - neighbor_mean = Array{Float64}(undef, 4, 3) + network_input = Vector{Float64}(undef, 15) + neighbor_ids = Array{Int64}(undef, 8) + neighbor_mean = Array{Float64}(undef, 4, 3) - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, network_input, neighbor_ids, neighbor_mean) end # cache for NeuralNetworkCNN-type indicator function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkCNN}}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - A = Array{real(basis), ndims(equations)} - - prototype = A(undef, nnodes(basis), nnodes(basis)) - indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - n_cnn = 4 - nodes,_ = gauss_lobatto_nodes_weights(nnodes(basis)) - cnn_nodes,_= gauss_lobatto_nodes_weights(n_cnn) - vandermonde = polynomial_interpolation_matrix(nodes, cnn_nodes) - network_input = Array{Float32}(undef, n_cnn, n_cnn, 1, 1) - - return (; alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + A = Array{real(basis), ndims(equations)} + + prototype = A(undef, nnodes(basis), nnodes(basis)) + indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] + n_cnn = 4 + nodes, _ = gauss_lobatto_nodes_weights(nnodes(basis)) + cnn_nodes, _ = gauss_lobatto_nodes_weights(n_cnn) + vandermonde = polynomial_interpolation_matrix(nodes, cnn_nodes) + network_input = Array{Float32}(undef, n_cnn, n_cnn, 1, 1) + + return (; alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, + network_input) end # this method is used when the indicator is constructed as for AMR function create_cache(typ::Type{<:IndicatorNeuralNetwork}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) + create_cache(typ, equations, dg.basis) end +function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(u, + mesh::TreeMesh{ + 2 + }, + equations, + dg::DGSEM, + cache; + kwargs...) + @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_ann.cache + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) + end -function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})( - u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...) + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] + modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] - @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = indicator_ann.variable(u_local, equations) + end - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_ann.cache - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator, modal_tmp1) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] + # Calculate total energies for all modes, without highest, without two highest + total_energy = zero(eltype(modal)) + for j in 1:nnodes(dg), i in 1:nnodes(dg) + total_energy += modal[i, j]^2 + end + total_energy_clip1 = zero(eltype(modal)) + for j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1) + total_energy_clip1 += modal[i, j]^2 + end + total_energy_clip2 = zero(eltype(modal)) + for j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2) + total_energy_clip2 += modal[i, j]^2 + end + total_energy_clip3 = zero(eltype(modal)) + for j in 1:(nnodes(dg) - 3), i in 1:(nnodes(dg) - 3) + total_energy_clip3 += modal[i, j]^2 + end - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = indicator_ann.variable(u_local, equations) + # Calculate energy in higher modes and polynomial degree for the network input + X1 = (total_energy - total_energy_clip1) / total_energy + X2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + X3 = (total_energy_clip2 - total_energy_clip3) / total_energy_clip2 + X4 = nnodes(dg) + network_input = SVector(X1, X2, X3, X4) + + # Scale input data + network_input = network_input / + max(maximum(abs, network_input), one(eltype(network_input))) + probability_troubled_cell = network(network_input)[1] + + # Compute indicator value + alpha[element] = probability_to_indicator(probability_troubled_cell, + alpha_continuous, + alpha_amr, alpha_min, alpha_max) end - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1) - - # Calculate total energies for all modes, without highest, without two highest - total_energy = zero(eltype(modal)) - for j in 1:nnodes(dg), i in 1:nnodes(dg) - total_energy += modal[i, j]^2 - end - total_energy_clip1 = zero(eltype(modal)) - for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1) - total_energy_clip1 += modal[i, j]^2 + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) end - total_energy_clip2 = zero(eltype(modal)) - for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2) - total_energy_clip2 += modal[i, j]^2 - end - total_energy_clip3 = zero(eltype(modal)) - for j in 1:(nnodes(dg)-3), i in 1:(nnodes(dg)-3) - total_energy_clip3 += modal[i, j]^2 - end - - # Calculate energy in higher modes and polynomial degree for the network input - X1 = (total_energy - total_energy_clip1)/total_energy - X2 = (total_energy_clip1 - total_energy_clip2)/total_energy_clip1 - X3 = (total_energy_clip2 - total_energy_clip3)/total_energy_clip2 - X4 = nnodes(dg) - network_input = SVector(X1, X2, X3, X4) - # Scale input data - network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input))) - probability_troubled_cell = network(network_input)[1] - - # Compute indicator value - alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous, - alpha_amr, alpha_min, alpha_max) - end - - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end - - return alpha + return alpha end +function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(u, + mesh::TreeMesh{ + 2 + }, + equations, + dg::DGSEM, + cache; + kwargs...) + @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, network_input, neighbor_ids, neighbor_mean = indicator_ann.cache #X, network_input + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) + end -function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})( - u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...) + c2e = zeros(Int, length(mesh.tree)) + for element in eachelement(dg, cache) + c2e[cache.elements.cell_ids[element]] = element + end - @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + X = Array{Float64}(undef, 3, nelements(dg, cache)) - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, network_input, neighbor_ids, neighbor_mean = indicator_ann.cache #X, network_input - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] + modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] - c2e = zeros(Int, length(mesh.tree)) - for element in eachelement(dg, cache) - c2e[cache.elements.cell_ids[element]] = element - end + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = indicator_ann.variable(u_local, equations) + end - X = Array{Float64}(undef, 3, nelements(dg, cache)) + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator, modal_tmp1) + # Save linear modal coefficients for the network input + X[1, element] = modal[1, 1] + X[2, element] = modal[1, 2] + X[3, element] = modal[2, 1] + end - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + cell_id = cache.elements.cell_ids[element] + + network_input[1] = X[1, element] + network_input[2] = X[2, element] + network_input[3] = X[3, element] + + for direction in eachdirection(mesh.tree) + if direction == 1 # -x + dir = 4 + elseif direction == 2 # +x + dir = 1 + elseif direction == 3 # -y + dir = 3 + elseif direction == 4 # +y + dir = 2 + end - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = indicator_ann.variable(u_local, equations) - end + # Of no neighbor exists and current cell is not small + if !has_any_neighbor(mesh.tree, cell_id, direction) + network_input[3 * dir + 1] = X[1, element] + network_input[3 * dir + 2] = X[2, element] + network_input[3 * dir + 3] = X[3, element] + continue + end - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1) - # Save linear modal coefficients for the network input - X[1,element] = modal[1,1] - X[2,element] = modal[1,2] - X[3,element] = modal[2,1] - end - - @threaded for element in eachelement(dg, cache) - cell_id = cache.elements.cell_ids[element] - - network_input[1] = X[1,element] - network_input[2] = X[2,element] - network_input[3] = X[3,element] - - for direction in eachdirection(mesh.tree) - if direction == 1 # -x - dir = 4 - elseif direction == 2 # +x - dir = 1 - elseif direction == 3 # -y - dir = 3 - elseif direction == 4 # +y - dir = 2 - end - - # Of no neighbor exists and current cell is not small - if !has_any_neighbor(mesh.tree, cell_id, direction) - network_input[3*dir+1] = X[1, element] - network_input[3*dir+2] = X[2, element] - network_input[3*dir+3] = X[3, element] - continue - end - - # Get Input data from neighbors - if has_neighbor(mesh.tree, cell_id, direction) - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor - # Mean over 4 neighbor cells - neighbor_ids[1] = mesh.tree.child_ids[1, neighbor_cell_id] - neighbor_ids[2] = mesh.tree.child_ids[2, neighbor_cell_id] - neighbor_ids[3] = mesh.tree.child_ids[3, neighbor_cell_id] - neighbor_ids[4] = mesh.tree.child_ids[4, neighbor_cell_id] - - for i in 1:4 - if has_children(mesh.tree, neighbor_ids[i]) - neighbor_ids5 = c2e[mesh.tree.child_ids[1, neighbor_ids[i]]] - neighbor_ids6 = c2e[mesh.tree.child_ids[2, neighbor_ids[i]]] - neighbor_ids7 = c2e[mesh.tree.child_ids[3, neighbor_ids[i]]] - neighbor_ids8 = c2e[mesh.tree.child_ids[4, neighbor_ids[i]]] - - neighbor_mean[i,1] = (X[1,neighbor_ids5] + X[1,neighbor_ids6] + X[1,neighbor_ids7] + X[1,neighbor_ids8])/4 - neighbor_mean[i,2] = (X[2,neighbor_ids5] + X[2,neighbor_ids6] + X[2,neighbor_ids7] + X[2,neighbor_ids8])/4 - neighbor_mean[i,3] = (X[3,neighbor_ids5] + X[3,neighbor_ids6] + X[3,neighbor_ids7] + X[3,neighbor_ids8])/4 - else - neighbor_id = c2e[neighbor_ids[i]] - neighbor_mean[i,1] = X[1,neighbor_id] - neighbor_mean[i,2] = X[2,neighbor_id] - neighbor_mean[i,3] = X[3,neighbor_id] + # Get Input data from neighbors + if has_neighbor(mesh.tree, cell_id, direction) + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor + # Mean over 4 neighbor cells + neighbor_ids[1] = mesh.tree.child_ids[1, neighbor_cell_id] + neighbor_ids[2] = mesh.tree.child_ids[2, neighbor_cell_id] + neighbor_ids[3] = mesh.tree.child_ids[3, neighbor_cell_id] + neighbor_ids[4] = mesh.tree.child_ids[4, neighbor_cell_id] + + for i in 1:4 + if has_children(mesh.tree, neighbor_ids[i]) + neighbor_ids5 = c2e[mesh.tree.child_ids[1, neighbor_ids[i]]] + neighbor_ids6 = c2e[mesh.tree.child_ids[2, neighbor_ids[i]]] + neighbor_ids7 = c2e[mesh.tree.child_ids[3, neighbor_ids[i]]] + neighbor_ids8 = c2e[mesh.tree.child_ids[4, neighbor_ids[i]]] + + neighbor_mean[i, 1] = (X[1, neighbor_ids5] + + X[1, neighbor_ids6] + + X[1, neighbor_ids7] + + X[1, neighbor_ids8]) / 4 + neighbor_mean[i, 2] = (X[2, neighbor_ids5] + + X[2, neighbor_ids6] + + X[2, neighbor_ids7] + + X[2, neighbor_ids8]) / 4 + neighbor_mean[i, 3] = (X[3, neighbor_ids5] + + X[3, neighbor_ids6] + + X[3, neighbor_ids7] + + X[3, neighbor_ids8]) / 4 + else + neighbor_id = c2e[neighbor_ids[i]] + neighbor_mean[i, 1] = X[1, neighbor_id] + neighbor_mean[i, 2] = X[2, neighbor_id] + neighbor_mean[i, 3] = X[3, neighbor_id] + end + end + network_input[3 * dir + 1] = (neighbor_mean[1, 1] + + neighbor_mean[2, 1] + + neighbor_mean[3, 1] + + neighbor_mean[4, 1]) / 4 + network_input[3 * dir + 2] = (neighbor_mean[1, 2] + + neighbor_mean[2, 2] + + neighbor_mean[3, 2] + + neighbor_mean[4, 2]) / 4 + network_input[3 * dir + 3] = (neighbor_mean[1, 3] + + neighbor_mean[2, 3] + + neighbor_mean[3, 3] + + neighbor_mean[4, 3]) / 4 + + else # Cell has same refinement level neighbor + neighbor_id = c2e[neighbor_cell_id] + network_input[3 * dir + 1] = X[1, neighbor_id] + network_input[3 * dir + 2] = X[2, neighbor_id] + network_input[3 * dir + 3] = X[3, neighbor_id] + end + else # Cell is small and has large neighbor + parent_id = mesh.tree.parent_ids[cell_id] + neighbor_id = c2e[mesh.tree.neighbor_ids[direction, parent_id]] + + network_input[3 * dir + 1] = X[1, neighbor_id] + network_input[3 * dir + 2] = X[2, neighbor_id] + network_input[3 * dir + 3] = X[3, neighbor_id] end - end - network_input[3*dir+1] = (neighbor_mean[1,1] + neighbor_mean[2,1] + neighbor_mean[3,1] + neighbor_mean[4,1])/4 - network_input[3*dir+2] = (neighbor_mean[1,2] + neighbor_mean[2,2] + neighbor_mean[3,2] + neighbor_mean[4,2])/4 - network_input[3*dir+3] = (neighbor_mean[1,3] + neighbor_mean[2,3] + neighbor_mean[3,3] + neighbor_mean[4,3])/4 - - else # Cell has same refinement level neighbor - neighbor_id = c2e[neighbor_cell_id] - network_input[3*dir+1] = X[1,neighbor_id] - network_input[3*dir+2] = X[2,neighbor_id] - network_input[3*dir+3] = X[3,neighbor_id] end - else # Cell is small and has large neighbor - parent_id = mesh.tree.parent_ids[cell_id] - neighbor_id = c2e[mesh.tree.neighbor_ids[direction, parent_id]] - - network_input[3*dir+1] = X[1,neighbor_id] - network_input[3*dir+2] = X[2,neighbor_id] - network_input[3*dir+3] = X[3,neighbor_id] - end - end - # Scale input data - network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input))) - probability_troubled_cell = network(network_input)[1] + # Scale input data + network_input = network_input / + max(maximum(abs, network_input), one(eltype(network_input))) + probability_troubled_cell = network(network_input)[1] - # Compute indicator value - alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous, - alpha_amr, alpha_min, alpha_max) - end + # Compute indicator value + alpha[element] = probability_to_indicator(probability_troubled_cell, + alpha_continuous, + alpha_amr, alpha_min, alpha_max) + end - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) + end - return alpha + return alpha end +function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkCNN})(u, mesh::TreeMesh{2}, + equations, dg::DGSEM, + cache; kwargs...) + @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + + @unpack alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input = indicator_ann.cache + # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? + # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` + # or just `resize!` whenever we call the relevant methods as we do now? + resize!(alpha, nelements(dg, cache)) + if alpha_smooth + resize!(alpha_tmp, nelements(dg, cache)) + end -function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkCNN})( - u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...) - @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - @unpack alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input = indicator_ann.cache - # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR? - # Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)` - # or just `resize!` whenever we call the relevant methods as we do now? - resize!(alpha, nelements(dg, cache)) - if alpha_smooth - resize!(alpha_tmp, nelements(dg, cache)) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, element) + indicator[i, j] = indicator_ann.variable(u_local, equations) + end + + # Interpolate nodal data to 4x4 LGL nodes + for j in 1:4, i in 1:4 + acc = zero(eltype(indicator)) + for jj in eachnode(dg), ii in eachnode(dg) + acc += vandermonde[i, ii] * indicator[ii, jj] * vandermonde[j, jj] + end + network_input[i, j, 1, 1] = acc + end - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + # Scale input data + network_input = network_input / + max(maximum(abs, network_input), one(eltype(network_input))) + probability_troubled_cell = network(network_input)[1] - # Calculate indicator variables at Gauss-Lobatto nodes - for j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, element) - indicator[i, j] = indicator_ann.variable(u_local, equations) + # Compute indicator value + alpha[element] = probability_to_indicator(probability_troubled_cell, + alpha_continuous, + alpha_amr, alpha_min, alpha_max) end - # Interpolate nodal data to 4x4 LGL nodes - for j in 1:4, i in 1:4 - acc = zero(eltype(indicator)) - for jj in eachnode(dg), ii in eachnode(dg) - acc += vandermonde[i,ii] * indicator[ii,jj] * vandermonde[j,jj] - end - network_input[i,j,1,1] = acc + if alpha_smooth + apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) end - # Scale input data - network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input))) - probability_troubled_cell = network(network_input)[1] - - # Compute indicator value - alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous, - alpha_amr, alpha_min, alpha_max) - end - - if alpha_smooth - apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache) - end - - return alpha + return alpha end - end # @muladd diff --git a/src/solvers/dgsem_tree/indicators_3d.jl b/src/solvers/dgsem_tree/indicators_3d.jl index c1e7aee886a..69041ed1298 100644 --- a/src/solvers/dgsem_tree/indicators_3d.jl +++ b/src/solvers/dgsem_tree/indicators_3d.jl @@ -3,242 +3,250 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis) - - alpha = Vector{real(basis)}() - alpha_tmp = similar(alpha) - - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - modal_tmp2_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] - - return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, modal_tmp2_threaded) +function create_cache(::Type{IndicatorHennemannGassner}, + equations::AbstractEquations{3}, basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() + alpha_tmp = similar(alpha) + + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + modal_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + modal_tmp2_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] + + return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, + modal_tmp2_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, + equations::AbstractEquations{3}, dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end - # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl # with @batch (@threaded). # Otherwise, @threaded does not work here with Julia ARM on macOS. # See https://github.com/JuliaSIMD/Polyester.jl/issues/88. -@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u, +@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, + u, element, mesh::AbstractMesh{3}, equations, dg, cache) - @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg - @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, - modal_tmp1_threaded, modal_tmp2_threaded = indicator_hg.cache - - indicator = indicator_threaded[Threads.threadid()] - modal = modal_threaded[Threads.threadid()] - modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] - modal_tmp2 = modal_tmp2_threaded[Threads.threadid()] - - # Calculate indicator variables at Gauss-Lobatto nodes - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, k, element) - indicator[i, j, k] = indicator_hg.variable(u_local, equations) - end - - # Convert to modal representation - multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1, modal_tmp2) - - # Calculate total energies for all modes, without highest, without two highest - total_energy = zero(eltype(modal)) - for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg) - total_energy += modal[i, j, k]^2 - end - total_energy_clip1 = zero(eltype(modal)) - for k in 1:(nnodes(dg)-1), j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1) - total_energy_clip1 += modal[i, j, k]^2 - end - total_energy_clip2 = zero(eltype(modal)) - for k in 1:(nnodes(dg)-2), j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2) - total_energy_clip2 += modal[i, j, k]^2 - end - - # Calculate energy in higher modes - if !(iszero(total_energy)) - energy_frac_1 = (total_energy - total_energy_clip1) / total_energy - else - energy_frac_1 = zero(total_energy) - end - if !(iszero(total_energy_clip1)) - energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 - else - energy_frac_2 = zero(total_energy_clip1) - end - energy = max(energy_frac_1, energy_frac_2) - - alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) - - # Take care of the case close to pure DG - if alpha_element < alpha_min - alpha_element = zero(alpha_element) - end - - # Take care of the case close to pure FV - if alpha_element > 1 - alpha_min - alpha_element = one(alpha_element) - end - - # Clip the maximum amount of FV allowed - alpha[element] = min(alpha_max, alpha_element) -end + @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg + @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, + modal_tmp1_threaded, modal_tmp2_threaded = indicator_hg.cache + indicator = indicator_threaded[Threads.threadid()] + modal = modal_threaded[Threads.threadid()] + modal_tmp1 = modal_tmp1_threaded[Threads.threadid()] + modal_tmp2 = modal_tmp2_threaded[Threads.threadid()] -function apply_smoothing!(mesh::Union{TreeMesh{3}, P4estMesh{3}}, alpha, alpha_tmp, dg, cache) - - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha - - # Loop over interfaces - for interface in eachinterface(dg, cache) - # Get neighboring element ids - left = cache.interfaces.neighbor_ids[1, interface] - right = cache.interfaces.neighbor_ids[2, interface] - - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) - alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) - end - - # Loop over L2 mortars - for mortar in eachmortar(dg, cache) - # Get neighboring element ids - lower_left = cache.mortars.neighbor_ids[1, mortar] - lower_right = cache.mortars.neighbor_ids[2, mortar] - upper_left = cache.mortars.neighbor_ids[3, mortar] - upper_right = cache.mortars.neighbor_ids[4, mortar] - large = cache.mortars.neighbor_ids[5, mortar] - - # Apply smoothing - alpha[lower_left] = max(alpha_tmp[lower_left], 0.5 * alpha_tmp[large], alpha[lower_left]) - alpha[lower_right] = max(alpha_tmp[lower_right], 0.5 * alpha_tmp[large], alpha[lower_right]) - alpha[upper_left] = max(alpha_tmp[upper_left], 0.5 * alpha_tmp[large], alpha[upper_left]) - alpha[upper_right] = max(alpha_tmp[upper_right], 0.5 * alpha_tmp[large], alpha[upper_right]) - - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_left], alpha[large]) - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_right], alpha[large]) - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_left], alpha[large]) - alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_right], alpha[large]) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, k, element) + indicator[i, j, k] = indicator_hg.variable(u_local, equations) + end -end + # Convert to modal representation + multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, + indicator, modal_tmp1, modal_tmp2) + # Calculate total energies for all modes, without highest, without two highest + total_energy = zero(eltype(modal)) + for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg) + total_energy += modal[i, j, k]^2 + end + total_energy_clip1 = zero(eltype(modal)) + for k in 1:(nnodes(dg) - 1), j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1) + total_energy_clip1 += modal[i, j, k]^2 + end + total_energy_clip2 = zero(eltype(modal)) + for k in 1:(nnodes(dg) - 2), j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2) + total_energy_clip2 += modal[i, j, k]^2 + end -# this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis) + # Calculate energy in higher modes + if !(iszero(total_energy)) + energy_frac_1 = (total_energy - total_energy_clip1) / total_energy + else + energy_frac_1 = zero(total_energy) + end + if !(iszero(total_energy_clip1)) + energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1 + else + energy_frac_2 = zero(total_energy_clip1) + end + energy = max(energy_frac_1, energy_frac_2) - alpha = Vector{real(basis)}() + alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold))) - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] + # Take care of the case close to pure DG + if alpha_element < alpha_min + alpha_element = zero(alpha_element) + end - return (; alpha, indicator_threaded) -end + # Take care of the case close to pure FV + if alpha_element > 1 - alpha_min + alpha_element = one(alpha_element) + end -# this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache) - create_cache(typ, equations, dg.basis) + # Clip the maximum amount of FV allowed + alpha[element] = min(alpha_max, alpha_element) end +function apply_smoothing!(mesh::Union{TreeMesh{3}, P4estMesh{3}}, alpha, alpha_tmp, dg, + cache) -function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,5}, - mesh, equations, dg::DGSEM, cache; - kwargs...) - @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" - @unpack alpha, indicator_threaded = löhner.cache - resize!(alpha, nelements(dg, cache)) + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + # Loop over interfaces + for interface in eachinterface(dg, cache) + # Get neighboring element ids + left = cache.interfaces.neighbor_ids[1, interface] + right = cache.interfaces.neighbor_ids[2, interface] - # Calculate indicator variables at Gauss-Lobatto nodes - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, k, element) - indicator[i, j, k] = löhner.variable(u_local, equations) + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) + alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) end - estimate = zero(real(dg)) - for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)-1 - # x direction - u0 = indicator[i, j, k] - up = indicator[i+1, j, k] - um = indicator[i-1, j, k] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + # Loop over L2 mortars + for mortar in eachmortar(dg, cache) + # Get neighboring element ids + lower_left = cache.mortars.neighbor_ids[1, mortar] + lower_right = cache.mortars.neighbor_ids[2, mortar] + upper_left = cache.mortars.neighbor_ids[3, mortar] + upper_right = cache.mortars.neighbor_ids[4, mortar] + large = cache.mortars.neighbor_ids[5, mortar] + + # Apply smoothing + alpha[lower_left] = max(alpha_tmp[lower_left], 0.5 * alpha_tmp[large], + alpha[lower_left]) + alpha[lower_right] = max(alpha_tmp[lower_right], 0.5 * alpha_tmp[large], + alpha[lower_right]) + alpha[upper_left] = max(alpha_tmp[upper_left], 0.5 * alpha_tmp[large], + alpha[upper_left]) + alpha[upper_right] = max(alpha_tmp[upper_right], 0.5 * alpha_tmp[large], + alpha[upper_right]) + + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_left], alpha[large]) + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_right], alpha[large]) + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_left], alpha[large]) + alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_right], alpha[large]) end +end - for k in eachnode(dg), j in 2:nnodes(dg)-1, i in eachnode(dg) - # y direction - u0 = indicator[i, j, k] - up = indicator[i, j+1, k] - um = indicator[i, j-1, k] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) - end +# this method is used when the indicator is constructed as for shock-capturing volume integrals +function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{3}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - for k in 2:nnodes(dg)-1, j in eachnode(dg), i in eachnode(dg) - # y direction - u0 = indicator[i, j, k ] - up = indicator[i, j, k+1] - um = indicator[i, j, k-1] - estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) - end + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] - # use the maximum as DG element indicator - alpha[element] = estimate - end + return (; alpha, indicator_threaded) +end - return alpha +# this method is used when the indicator is constructed as for AMR +function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{3}, + dg::DGSEM, cache) + create_cache(typ, equations, dg.basis) end +function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 5}, + mesh, equations, dg::DGSEM, cache; + kwargs...) + @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)" + @unpack alpha, indicator_threaded = löhner.cache + resize!(alpha, nelements(dg, cache)) + + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] + + # Calculate indicator variables at Gauss-Lobatto nodes + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, k, element) + indicator[i, j, k] = löhner.variable(u_local, equations) + end + + estimate = zero(real(dg)) + for k in eachnode(dg), j in eachnode(dg), i in 2:(nnodes(dg) - 1) + # x direction + u0 = indicator[i, j, k] + up = indicator[i + 1, j, k] + um = indicator[i - 1, j, k] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end + + for k in eachnode(dg), j in 2:(nnodes(dg) - 1), i in eachnode(dg) + # y direction + u0 = indicator[i, j, k] + up = indicator[i, j + 1, k] + um = indicator[i, j - 1, k] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end + + for k in 2:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg) + # y direction + u0 = indicator[i, j, k] + up = indicator[i, j, k + 1] + um = indicator[i, j, k - 1] + estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner)) + end + + # use the maximum as DG element indicator + alpha[element] = estimate + end -# this method is used when the indicator is constructed as for shock-capturing volume integrals -function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis) + return alpha +end - alpha = Vector{real(basis)}() +# this method is used when the indicator is constructed as for shock-capturing volume integrals +function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{3}, + basis::LobattoLegendreBasis) + alpha = Vector{real(basis)}() - A = Array{real(basis), ndims(equations)} - indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()] + A = Array{real(basis), ndims(equations)} + indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) + for _ in 1:Threads.nthreads()] - return (; alpha, indicator_threaded) + return (; alpha, indicator_threaded) end # this method is used when the indicator is constructed as for AMR -function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache) - cache = create_cache(typ, equations, dg.basis) +function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{3}, + dg::DGSEM, cache) + cache = create_cache(typ, equations, dg.basis) end - -function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,5}, +function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 5}, mesh, equations, dg::DGSEM, cache; kwargs...) - @unpack alpha, indicator_threaded = indicator_max.cache - resize!(alpha, nelements(dg, cache)) + @unpack alpha, indicator_threaded = indicator_max.cache + resize!(alpha, nelements(dg, cache)) - @threaded for element in eachelement(dg, cache) - indicator = indicator_threaded[Threads.threadid()] + @threaded for element in eachelement(dg, cache) + indicator = indicator_threaded[Threads.threadid()] - # Calculate indicator variables at Gauss-Lobatto nodes - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_local = get_node_vars(u, equations, dg, i, j, k, element) - indicator[i, j, k] = indicator_max.variable(u_local, equations) - end + # Calculate indicator variables at Gauss-Lobatto nodes + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_local = get_node_vars(u, equations, dg, i, j, k, element) + indicator[i, j, k] = indicator_max.variable(u_local, equations) + end - alpha[element] = maximum(indicator) - end + alpha[element] = maximum(indicator) + end - return alpha + return alpha end - - end # @muladd diff --git a/src/solvers/dgsem_unstructured/containers_2d.jl b/src/solvers/dgsem_unstructured/containers_2d.jl index f1fda031ee9..13eeaeabffb 100644 --- a/src/solvers/dgsem_unstructured/containers_2d.jl +++ b/src/solvers/dgsem_unstructured/containers_2d.jl @@ -3,42 +3,44 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Container data structure (structure-of-arrays style) for DG elements on curved unstructured mesh -struct UnstructuredElementContainer2D{RealT<:Real, uEltype<:Real} - node_coordinates ::Array{RealT, 4} # [ndims, nnodes, nnodes, nelement] - jacobian_matrix ::Array{RealT, 5} # [ndims, ndims, nnodes, nnodes, nelement] - inverse_jacobian ::Array{RealT, 3} # [nnodes, nnodes, nelement] - contravariant_vectors::Array{RealT, 5} # [ndims, ndims, nnodes, nnodes, nelement] - normal_directions ::Array{RealT, 4} # [ndims, nnodes, local sides, nelement] - surface_flux_values ::Array{uEltype, 4} # [variables, nnodes, local sides, elements] +struct UnstructuredElementContainer2D{RealT <: Real, uEltype <: Real} + node_coordinates::Array{RealT, 4} # [ndims, nnodes, nnodes, nelement] + jacobian_matrix::Array{RealT, 5} # [ndims, ndims, nnodes, nnodes, nelement] + inverse_jacobian::Array{RealT, 3} # [nnodes, nnodes, nelement] + contravariant_vectors::Array{RealT, 5} # [ndims, ndims, nnodes, nnodes, nelement] + normal_directions::Array{RealT, 4} # [ndims, nnodes, local sides, nelement] + surface_flux_values::Array{uEltype, 4} # [variables, nnodes, local sides, elements] end - # construct an empty curved element container to be filled later with geometries in the # unstructured mesh constructor -function UnstructuredElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - node_coordinates = fill(nan_RealT, (2, n_nodes, n_nodes, capacity)) - jacobian_matrix = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity)) - inverse_jacobian = fill(nan_RealT, (n_nodes, n_nodes, capacity)) - contravariant_vectors = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity)) - normal_directions = fill(nan_RealT, (2, n_nodes, 4, capacity)) - surface_flux_values = fill(nan_uEltype, (n_variables, n_nodes, 4, capacity)) - - return UnstructuredElementContainer2D{RealT, uEltype}(node_coordinates, - jacobian_matrix, - inverse_jacobian, - contravariant_vectors, - normal_directions, - surface_flux_values) +function UnstructuredElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) + + node_coordinates = fill(nan_RealT, (2, n_nodes, n_nodes, capacity)) + jacobian_matrix = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity)) + inverse_jacobian = fill(nan_RealT, (n_nodes, n_nodes, capacity)) + contravariant_vectors = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity)) + normal_directions = fill(nan_RealT, (2, n_nodes, 4, capacity)) + surface_flux_values = fill(nan_uEltype, (n_variables, n_nodes, 4, capacity)) + + return UnstructuredElementContainer2D{RealT, uEltype}(node_coordinates, + jacobian_matrix, + inverse_jacobian, + contravariant_vectors, + normal_directions, + surface_flux_values) end - -@inline nelements(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 4) +@inline function nelements(elements::UnstructuredElementContainer2D) + size(elements.surface_flux_values, 4) +end """ eachelement(elements::UnstructuredElementContainer2D) @@ -46,280 +48,292 @@ Return an iterator over the indices that specify the location in relevant data s for the elements in `elements`. In particular, not the elements themselves are returned. """ -@inline eachelement(elements::UnstructuredElementContainer2D) = Base.OneTo(nelements(elements)) +@inline function eachelement(elements::UnstructuredElementContainer2D) + Base.OneTo(nelements(elements)) +end -@inline nvariables(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 1) -@inline nnodes(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 2) +@inline function nvariables(elements::UnstructuredElementContainer2D) + size(elements.surface_flux_values, 1) +end +@inline function nnodes(elements::UnstructuredElementContainer2D) + size(elements.surface_flux_values, 2) +end Base.real(elements::UnstructuredElementContainer2D) = eltype(elements.node_coordinates) -Base.eltype(elements::UnstructuredElementContainer2D) = eltype(elements.surface_flux_values) - +function Base.eltype(elements::UnstructuredElementContainer2D) + eltype(elements.surface_flux_values) +end @inline function get_surface_normal(vec, indices...) - # way to extract the normal vector at the surfaces without allocating - surface_vector = SVector(ntuple(j -> vec[j, indices...], 2)) - return surface_vector + # way to extract the normal vector at the surfaces without allocating + surface_vector = SVector(ntuple(j -> vec[j, indices...], 2)) + return surface_vector end function init_elements(mesh::UnstructuredMesh2D, equations, basis, RealT, uEltype) - elements = UnstructuredElementContainer2D{RealT, uEltype}( - mesh.n_elements, nvariables(equations), nnodes(basis)) - init_elements!(elements, mesh, basis) - return elements + elements = UnstructuredElementContainer2D{RealT, uEltype}(mesh.n_elements, + nvariables(equations), + nnodes(basis)) + init_elements!(elements, mesh, basis) + return elements end - function init_elements!(elements::UnstructuredElementContainer2D, mesh, basis) - four_corners = zeros(eltype(mesh.corners), 4, 2) - - # loop through elements and call the correct constructor based on whether the element is curved - for element in eachelement(elements) - if mesh.element_is_curved[element] - init_element!(elements, element, basis.nodes, view(mesh.surface_curves, :, element)) - else # straight sided element - for i in 1:4, j in 1:2 - # pull the (x,y) values of these corners out of the global corners array - four_corners[i, j] = mesh.corners[j, mesh.element_node_ids[i, element]] - end - init_element!(elements, element, basis.nodes, four_corners) + four_corners = zeros(eltype(mesh.corners), 4, 2) + + # loop through elements and call the correct constructor based on whether the element is curved + for element in eachelement(elements) + if mesh.element_is_curved[element] + init_element!(elements, element, basis.nodes, + view(mesh.surface_curves, :, element)) + else # straight sided element + for i in 1:4, j in 1:2 + # pull the (x,y) values of these corners out of the global corners array + four_corners[i, j] = mesh.corners[j, mesh.element_node_ids[i, element]] + end + init_element!(elements, element, basis.nodes, four_corners) + end end - end end - # initialize all the values in the container of a general element (either straight sided or curved) function init_element!(elements, element, nodes, corners_or_surface_curves) + calc_node_coordinates!(elements.node_coordinates, element, nodes, + corners_or_surface_curves) - calc_node_coordinates!(elements.node_coordinates, element, nodes, corners_or_surface_curves) + calc_metric_terms!(elements.jacobian_matrix, element, nodes, + corners_or_surface_curves) - calc_metric_terms!(elements.jacobian_matrix, element, nodes, corners_or_surface_curves) + calc_inverse_jacobian!(elements.inverse_jacobian, element, elements.jacobian_matrix) - calc_inverse_jacobian!(elements.inverse_jacobian, element, elements.jacobian_matrix) + calc_contravariant_vectors!(elements.contravariant_vectors, element, + elements.jacobian_matrix) - calc_contravariant_vectors!(elements.contravariant_vectors, element, elements.jacobian_matrix) + calc_normal_directions!(elements.normal_directions, element, nodes, + corners_or_surface_curves) - calc_normal_directions!(elements.normal_directions, element, nodes, corners_or_surface_curves) - - return elements + return elements end - # generic container for the interior interfaces of an unstructured mesh -struct UnstructuredInterfaceContainer2D{uEltype<:Real} - u ::Array{uEltype, 4} # [primary/secondary, variables, i, interfaces] - start_index ::Vector{Int} # [interfaces] - index_increment ::Vector{Int} # [interfaces] - element_ids ::Array{Int, 2} # [primary/secondary, interfaces] - element_side_ids ::Array{Int, 2} # [primary/secondary, interfaces] +struct UnstructuredInterfaceContainer2D{uEltype <: Real} + u::Array{uEltype, 4} # [primary/secondary, variables, i, interfaces] + start_index::Vector{Int} # [interfaces] + index_increment::Vector{Int} # [interfaces] + element_ids::Array{Int, 2} # [primary/secondary, interfaces] + element_side_ids::Array{Int, 2} # [primary/secondary, interfaces] end - # Construct an empty curved interface container to be filled later with neighbour # information in the unstructured mesh constructor -function UnstructuredInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real} - - nan_uEltype = convert(uEltype, NaN) - - u = fill(nan_uEltype, (2, n_variables, n_nodes, capacity)) - start_index = fill(typemin(Int), capacity) - index_increment = fill(typemin(Int), capacity) - element_ids = fill(typemin(Int), (2, capacity)) - element_side_ids = fill(typemin(Int), (2, capacity)) - - return UnstructuredInterfaceContainer2D{uEltype}( - u, start_index, index_increment, element_ids, element_side_ids) +function UnstructuredInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, + n_nodes) where {uEltype <: Real} + nan_uEltype = convert(uEltype, NaN) + + u = fill(nan_uEltype, (2, n_variables, n_nodes, capacity)) + start_index = fill(typemin(Int), capacity) + index_increment = fill(typemin(Int), capacity) + element_ids = fill(typemin(Int), (2, capacity)) + element_side_ids = fill(typemin(Int), (2, capacity)) + + return UnstructuredInterfaceContainer2D{uEltype}(u, start_index, index_increment, + element_ids, element_side_ids) end - -@inline ninterfaces(interfaces::UnstructuredInterfaceContainer2D) = length(interfaces.start_index) +@inline function ninterfaces(interfaces::UnstructuredInterfaceContainer2D) + length(interfaces.start_index) +end @inline nnodes(interfaces::UnstructuredInterfaceContainer2D) = size(interfaces.u, 3) +function init_interfaces(mesh::UnstructuredMesh2D, + elements::UnstructuredElementContainer2D) + interfaces = UnstructuredInterfaceContainer2D{eltype(elements)}(mesh.n_interfaces, + nvariables(elements), + nnodes(elements)) -function init_interfaces(mesh::UnstructuredMesh2D, elements::UnstructuredElementContainer2D) - - interfaces = UnstructuredInterfaceContainer2D{eltype(elements)}( - mesh.n_interfaces, nvariables(elements), nnodes(elements)) - - # extract and save the appropriate neighbour information from the mesh skeleton - if isperiodic(mesh) - init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names, - mesh.n_elements, True()) - else - init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names, - mesh.n_elements, False()) - end + # extract and save the appropriate neighbour information from the mesh skeleton + if isperiodic(mesh) + init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names, + mesh.n_elements, True()) + else + init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names, + mesh.n_elements, False()) + end - return interfaces + return interfaces end - function init_interfaces!(interfaces, edge_information, boundary_names, n_elements, periodic::False) - - n_nodes = nnodes(interfaces) - n_surfaces = size(edge_information, 2) - intr_count = 1 - for j in 1:n_surfaces - if edge_information[4,j] > 0 - # get the primary/secondary element information and coupling for an interior interface - interfaces.element_ids[1,intr_count] = edge_information[3,j] # primary element id - interfaces.element_ids[2,intr_count] = edge_information[4,j] # secondary element id - interfaces.element_side_ids[1,intr_count] = edge_information[5,j] # primary side id - interfaces.element_side_ids[2,intr_count] = abs(edge_information[6,j]) # secondary side id - # default the start and increment indexing - interfaces.start_index[intr_count] = 1 - interfaces.index_increment[intr_count] = 1 - if edge_information[6,j] < 0 - # coordinate system in the secondary element is "flipped" compared to the primary element. - # Adjust the start and increment indexes such that the secondary element coordinate system - # can match the primary neighbour when surface coupling is computed - interfaces.start_index[intr_count] = n_nodes - interfaces.index_increment[intr_count] = -1 - end - intr_count += 1 + n_nodes = nnodes(interfaces) + n_surfaces = size(edge_information, 2) + intr_count = 1 + for j in 1:n_surfaces + if edge_information[4, j] > 0 + # get the primary/secondary element information and coupling for an interior interface + interfaces.element_ids[1, intr_count] = edge_information[3, j] # primary element id + interfaces.element_ids[2, intr_count] = edge_information[4, j] # secondary element id + interfaces.element_side_ids[1, intr_count] = edge_information[5, j] # primary side id + interfaces.element_side_ids[2, intr_count] = abs(edge_information[6, j]) # secondary side id + # default the start and increment indexing + interfaces.start_index[intr_count] = 1 + interfaces.index_increment[intr_count] = 1 + if edge_information[6, j] < 0 + # coordinate system in the secondary element is "flipped" compared to the primary element. + # Adjust the start and increment indexes such that the secondary element coordinate system + # can match the primary neighbour when surface coupling is computed + interfaces.start_index[intr_count] = n_nodes + interfaces.index_increment[intr_count] = -1 + end + intr_count += 1 + end end - end - return nothing + return nothing end - function init_interfaces!(interfaces, edge_information, boundary_names, n_elements, periodic::True) - - n_nodes = nnodes(interfaces) - n_surfaces = size(edge_information, 2) - # for now this set a fully periodic domain - # TODO: possibly adjust to be able to set periodic in only the x or y direction - for j in 1:n_surfaces - if edge_information[4,j] > 0 - # get the primary/secondary element information and coupling for an interior interface - interfaces.element_ids[1,j] = edge_information[3,j] # primary element id - interfaces.element_ids[2,j] = edge_information[4,j] # secondary element id - interfaces.element_side_ids[1,j] = edge_information[5,j] # primary side id - interfaces.element_side_ids[2,j] = abs(edge_information[6,j]) # secondary side id - # default the start and increment indexing - interfaces.start_index[j] = 1 - interfaces.index_increment[j] = 1 - if edge_information[6,j] < 0 - # coordinate system in the secondary element is "flipped" compared to the primary element. - # Adjust the start and increment indexes such that the secondary element coordinate system - # can match the primary neighbour when surface coupling is computed - interfaces.start_index[j] = n_nodes - interfaces.index_increment[j] = -1 - end - else - # way to set periodic BCs where we are assuming to have a structured mesh with internal curves - primary_side = edge_information[5,j] - primary_element = edge_information[3,j] - # Note: This is a way to get the neighbour element number and local side from a square - # structured mesh where the element local surface numbering is right-handed - if boundary_names[primary_side, primary_element] === :Bottom - secondary_element = primary_element + (n_elements - convert(Int, sqrt(n_elements))) - secondary_side = 3 - elseif boundary_names[primary_side, primary_element] === :Top - secondary_element = primary_element - (n_elements - convert(Int, sqrt(n_elements))) - secondary_side = 1 - elseif boundary_names[primary_side, primary_element] === :Left - secondary_element = primary_element + (convert(Int, sqrt(n_elements)) - 1) - secondary_side = 2 - elseif boundary_names[primary_side, primary_element] === :Right - secondary_element = primary_element - (convert(Int, sqrt(n_elements)) - 1) - secondary_side = 4 - end - interfaces.element_ids[1,j] = primary_element - interfaces.element_ids[2,j] = secondary_element - interfaces.element_side_ids[1,j] = primary_side - interfaces.element_side_ids[2,j] = secondary_side - # set the start and increment indexing - # Note! We assume that the periodic mesh has no flipped element coordinate systems - interfaces.start_index[j] = 1 - interfaces.index_increment[j] = 1 + n_nodes = nnodes(interfaces) + n_surfaces = size(edge_information, 2) + # for now this set a fully periodic domain + # TODO: possibly adjust to be able to set periodic in only the x or y direction + for j in 1:n_surfaces + if edge_information[4, j] > 0 + # get the primary/secondary element information and coupling for an interior interface + interfaces.element_ids[1, j] = edge_information[3, j] # primary element id + interfaces.element_ids[2, j] = edge_information[4, j] # secondary element id + interfaces.element_side_ids[1, j] = edge_information[5, j] # primary side id + interfaces.element_side_ids[2, j] = abs(edge_information[6, j]) # secondary side id + # default the start and increment indexing + interfaces.start_index[j] = 1 + interfaces.index_increment[j] = 1 + if edge_information[6, j] < 0 + # coordinate system in the secondary element is "flipped" compared to the primary element. + # Adjust the start and increment indexes such that the secondary element coordinate system + # can match the primary neighbour when surface coupling is computed + interfaces.start_index[j] = n_nodes + interfaces.index_increment[j] = -1 + end + else + # way to set periodic BCs where we are assuming to have a structured mesh with internal curves + primary_side = edge_information[5, j] + primary_element = edge_information[3, j] + # Note: This is a way to get the neighbour element number and local side from a square + # structured mesh where the element local surface numbering is right-handed + if boundary_names[primary_side, primary_element] === :Bottom + secondary_element = primary_element + + (n_elements - convert(Int, sqrt(n_elements))) + secondary_side = 3 + elseif boundary_names[primary_side, primary_element] === :Top + secondary_element = primary_element - + (n_elements - convert(Int, sqrt(n_elements))) + secondary_side = 1 + elseif boundary_names[primary_side, primary_element] === :Left + secondary_element = primary_element + + (convert(Int, sqrt(n_elements)) - 1) + secondary_side = 2 + elseif boundary_names[primary_side, primary_element] === :Right + secondary_element = primary_element - + (convert(Int, sqrt(n_elements)) - 1) + secondary_side = 4 + end + interfaces.element_ids[1, j] = primary_element + interfaces.element_ids[2, j] = secondary_element + interfaces.element_side_ids[1, j] = primary_side + interfaces.element_side_ids[2, j] = secondary_side + # set the start and increment indexing + # Note! We assume that the periodic mesh has no flipped element coordinate systems + interfaces.start_index[j] = 1 + interfaces.index_increment[j] = 1 + end end - end - return nothing + return nothing end - # TODO: Clean-up meshes. Find a better name since it's also used for other meshes # generic container for the boundary interfaces of an unstructured mesh -struct UnstructuredBoundaryContainer2D{RealT<:Real, uEltype<:Real} - u ::Array{uEltype, 3} # [variables, i, boundaries] - element_id ::Vector{Int} # [boundaries] - element_side_id ::Vector{Int} # [boundaries] - node_coordinates::Array{RealT, 3} # [ndims, nnodes, boundaries] - name ::Vector{Symbol} # [boundaries] +struct UnstructuredBoundaryContainer2D{RealT <: Real, uEltype <: Real} + u::Array{uEltype, 3} # [variables, i, boundaries] + element_id::Vector{Int} # [boundaries] + element_side_id::Vector{Int} # [boundaries] + node_coordinates::Array{RealT, 3} # [ndims, nnodes, boundaries] + name::Vector{Symbol} # [boundaries] end - # construct an empty curved boundary container to be filled later with neighbour # information in the unstructured mesh constructor -function UnstructuredBoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real} - - nan_RealT = convert(RealT, NaN) - nan_uEltype = convert(uEltype, NaN) - - u = fill(nan_uEltype, (n_variables, n_nodes, capacity)) - element_id = fill(typemin(Int), capacity) - element_side_id = fill(typemin(Int), capacity) - node_coordinates = fill(nan_RealT, (2, n_nodes, capacity)) - name = fill(:empty, capacity) - - return UnstructuredBoundaryContainer2D{RealT, uEltype}( - u, element_id, element_side_id, node_coordinates, name) +function UnstructuredBoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, + n_nodes) where {RealT <: Real, + uEltype <: + Real} + nan_RealT = convert(RealT, NaN) + nan_uEltype = convert(uEltype, NaN) + + u = fill(nan_uEltype, (n_variables, n_nodes, capacity)) + element_id = fill(typemin(Int), capacity) + element_side_id = fill(typemin(Int), capacity) + node_coordinates = fill(nan_RealT, (2, n_nodes, capacity)) + name = fill(:empty, capacity) + + return UnstructuredBoundaryContainer2D{RealT, uEltype}(u, element_id, + element_side_id, + node_coordinates, name) end +@inline function nboundaries(boundaries::UnstructuredBoundaryContainer2D) + length(boundaries.name) +end -@inline nboundaries(boundaries::UnstructuredBoundaryContainer2D) = length(boundaries.name) - - -function init_boundaries(mesh::UnstructuredMesh2D, elements::UnstructuredElementContainer2D) - - boundaries = UnstructuredBoundaryContainer2D{real(elements), eltype(elements)}( - mesh.n_boundaries, nvariables(elements), nnodes(elements)) +function init_boundaries(mesh::UnstructuredMesh2D, + elements::UnstructuredElementContainer2D) + boundaries = UnstructuredBoundaryContainer2D{real(elements), eltype(elements)}(mesh.n_boundaries, + nvariables(elements), + nnodes(elements)) - # extract and save the appropriate boundary information provided any physical boundaries exist - if mesh.n_boundaries > 0 - init_boundaries!(boundaries, mesh.neighbour_information, mesh.boundary_names, elements) - end - return boundaries + # extract and save the appropriate boundary information provided any physical boundaries exist + if mesh.n_boundaries > 0 + init_boundaries!(boundaries, mesh.neighbour_information, mesh.boundary_names, + elements) + end + return boundaries end - function init_boundaries!(boundaries::UnstructuredBoundaryContainer2D, edge_information, boundary_names, elements) - - n_surfaces = size(edge_information,2) - bndy_count = 1 - for j in 1:n_surfaces - if edge_information[4,j] == 0 - # get the primary element information at a boundary interface - primary_element = edge_information[3,j] - primary_side = edge_information[5,j] - boundaries.element_id[bndy_count] = primary_element - boundaries.element_side_id[bndy_count] = primary_side - - # extract the physical boundary's name from the global list - boundaries.name[bndy_count] = boundary_names[primary_side, primary_element] - - # Store copy of the (x,y) node coordinates on the physical boundary - enc = elements.node_coordinates - if primary_side == 1 - boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, 1, primary_element] - elseif primary_side == 2 - boundaries.node_coordinates[:, :, bndy_count] .= enc[:, end, :, primary_element] - elseif primary_side == 3 - boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, end, primary_element] - else # primary_side == 4 - boundaries.node_coordinates[:, :, bndy_count] .= enc[:, 1, :, primary_element] - end - bndy_count += 1 + n_surfaces = size(edge_information, 2) + bndy_count = 1 + for j in 1:n_surfaces + if edge_information[4, j] == 0 + # get the primary element information at a boundary interface + primary_element = edge_information[3, j] + primary_side = edge_information[5, j] + boundaries.element_id[bndy_count] = primary_element + boundaries.element_side_id[bndy_count] = primary_side + + # extract the physical boundary's name from the global list + boundaries.name[bndy_count] = boundary_names[primary_side, primary_element] + + # Store copy of the (x,y) node coordinates on the physical boundary + enc = elements.node_coordinates + if primary_side == 1 + boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, 1, + primary_element] + elseif primary_side == 2 + boundaries.node_coordinates[:, :, bndy_count] .= enc[:, end, :, + primary_element] + elseif primary_side == 3 + boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, end, + primary_element] + else # primary_side == 4 + boundaries.node_coordinates[:, :, bndy_count] .= enc[:, 1, :, + primary_element] + end + bndy_count += 1 + end end - end - return nothing + return nothing end - - end # @muladd diff --git a/src/solvers/dgsem_unstructured/dg.jl b/src/solvers/dgsem_unstructured/dg.jl index 36926e6463a..3543f1a5829 100644 --- a/src/solvers/dgsem_unstructured/dg.jl +++ b/src/solvers/dgsem_unstructured/dg.jl @@ -3,19 +3,19 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent - -@inline function get_one_sided_surface_node_vars(u, equations, solver::DG, j, indices...) - # There is a cut-off at `n == 10` inside of the method - # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 - # in Julia `v1.5`, leading to type instabilities if - # more than ten variables are used. That's why we use - # `Val(...)` below. - u_surface = SVector(ntuple(v -> u[j, v, indices...], Val(nvariables(equations)))) - return u_surface +@inline function get_one_sided_surface_node_vars(u, equations, solver::DG, j, + indices...) + # There is a cut-off at `n == 10` inside of the method + # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 + # in Julia `v1.5`, leading to type instabilities if + # more than ten variables are used. That's why we use + # `Val(...)` below. + u_surface = SVector(ntuple(v -> u[j, v, indices...], Val(nvariables(equations)))) + return u_surface end - # 2D unstructured DG implementation include("mappings_geometry_curved_2d.jl") include("mappings_geometry_straight_2d.jl") @@ -23,5 +23,4 @@ include("containers_2d.jl") include("sort_boundary_conditions.jl") include("dg_2d.jl") include("indicators_2d.jl") - end # @muladd diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 283f8bdc74e..95dec027a82 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -3,82 +3,90 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This method is called when a SemidiscretizationHyperbolic is constructed. # It constructs the basic `cache` used throughout the simulation to compute # the RHS etc. function create_cache(mesh::UnstructuredMesh2D, equations, dg::DG, RealT, uEltype) + elements = init_elements(mesh, equations, dg.basis, RealT, uEltype) - elements = init_elements(mesh, equations, dg.basis, RealT, uEltype) - - interfaces = init_interfaces(mesh, elements) + interfaces = init_interfaces(mesh, elements) - boundaries = init_boundaries(mesh, elements) + boundaries = init_boundaries(mesh, elements) - cache = (; elements, interfaces, boundaries) + cache = (; elements, interfaces, boundaries) - # perform a check on the sufficient metric identities condition for free-stream preservation - # and halt computation if it fails - if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol=1e-12) - error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))") - end + # perform a check on the sufficient metric identities condition for free-stream preservation + # and halt computation if it fails + if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol = 1e-12) + error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))") + end - # Add specialized parts of the cache required to compute the flux differencing volume integral - cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) + # Add specialized parts of the cache required to compute the flux differencing volume integral + cache = (; cache..., + create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...) - return cache + return cache end - function rhs!(du, u, t, mesh::UnstructuredMesh2D, equations, initial_condition, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} - # Reset du - @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) - - # Calculate volume integral - @trixi_timeit timer() "volume integral" calc_volume_integral!( - du, u, mesh, - have_nonconservative_terms(equations), equations, - dg.volume_integral, dg, cache) - - # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate interface fluxes - @trixi_timeit timer() "interface flux" calc_interface_flux!( - cache.elements.surface_flux_values, mesh, - have_nonconservative_terms(equations), equations, - dg.surface_integral, dg, cache) - - # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!( - cache, u, mesh, equations, dg.surface_integral, dg) - - # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" calc_boundary_flux!( - cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - - # Calculate surface integrals - @trixi_timeit timer() "surface integral" calc_surface_integral!( - du, u, mesh, equations, dg.surface_integral, dg, cache) - - # Apply Jacobian from mapping to reference element - # Note! this routine is reused from dg_curved/dg_2d.jl - @trixi_timeit timer() "Jacobian" apply_jacobian!( - du, mesh, equations, dg, cache) - - # Calculate source terms - @trixi_timeit timer() "source terms" calc_sources!( - du, u, t, source_terms, equations, dg, cache) - - return nothing -end + # Reset du + @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) + + # Calculate volume integral + @trixi_timeit timer() "volume integral" begin + calc_volume_integral!(du, u, mesh, + have_nonconservative_terms(equations), equations, + dg.volume_integral, dg, cache) + end + # Prolong solution to interfaces + @trixi_timeit timer() "prolong2interfaces" begin + prolong2interfaces!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate interface fluxes + @trixi_timeit timer() "interface flux" begin + calc_interface_flux!(cache.elements.surface_flux_values, mesh, + have_nonconservative_terms(equations), equations, + dg.surface_integral, dg, cache) + end + + # Prolong solution to boundaries + @trixi_timeit timer() "prolong2boundaries" begin + prolong2boundaries!(cache, u, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate boundary fluxes + @trixi_timeit timer() "boundary flux" begin + calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + dg.surface_integral, dg) + end + + # Calculate surface integrals + @trixi_timeit timer() "surface integral" begin + calc_surface_integral!(du, u, mesh, equations, + dg.surface_integral, dg, cache) + end + + # Apply Jacobian from mapping to reference element + # Note! this routine is reused from dg_curved/dg_2d.jl + @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + + # Calculate source terms + @trixi_timeit timer() "source terms" begin + calc_sources!(du, u, t, source_terms, equations, dg, cache) + end + + return nothing +end # prolong the solution into the convenience array in the interior interface container # We pass the `surface_integral` argument solely for dispatch @@ -86,107 +94,110 @@ end function prolong2interfaces!(cache, u, mesh::UnstructuredMesh2D, equations, surface_integral, dg::DG) - @unpack interfaces = cache - - @threaded for interface in eachinterface(dg, cache) - primary_element = interfaces.element_ids[1, interface] - secondary_element = interfaces.element_ids[2, interface] - - primary_side = interfaces.element_side_ids[1, interface] - secondary_side = interfaces.element_side_ids[2, interface] - - if primary_side == 1 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i, 1, primary_element] - end - elseif primary_side == 2 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, nnodes(dg), i, primary_element] - end - elseif primary_side == 3 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), primary_element] - end - else # primary_side == 4 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, 1, i, primary_element] - end - end - - if secondary_side == 1 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i, 1, secondary_element] - end - elseif secondary_side == 2 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, nnodes(dg), i, secondary_element] - end - elseif secondary_side == 3 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i, nnodes(dg), secondary_element] - end - else # secondary_side == 4 - for i in eachnode(dg), v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, 1, i, secondary_element] - end + @unpack interfaces = cache + + @threaded for interface in eachinterface(dg, cache) + primary_element = interfaces.element_ids[1, interface] + secondary_element = interfaces.element_ids[2, interface] + + primary_side = interfaces.element_side_ids[1, interface] + secondary_side = interfaces.element_side_ids[2, interface] + + if primary_side == 1 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, i, 1, primary_element] + end + elseif primary_side == 2 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, nnodes(dg), i, primary_element] + end + elseif primary_side == 3 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), primary_element] + end + else # primary_side == 4 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[1, v, i, interface] = u[v, 1, i, primary_element] + end + end + + if secondary_side == 1 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[2, v, i, interface] = u[v, i, 1, secondary_element] + end + elseif secondary_side == 2 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[2, v, i, interface] = u[v, nnodes(dg), i, + secondary_element] + end + elseif secondary_side == 3 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[2, v, i, interface] = u[v, i, nnodes(dg), + secondary_element] + end + else # secondary_side == 4 + for i in eachnode(dg), v in eachvariable(equations) + interfaces.u[2, v, i, interface] = u[v, 1, i, secondary_element] + end + end end - end - return nothing + return nothing end - # compute the numerical flux interface coupling between two elements on an unstructured # quadrilateral mesh function calc_interface_flux!(surface_flux_values, mesh::UnstructuredMesh2D, nonconservative_terms::False, equations, surface_integral, dg::DG, cache) - @unpack surface_flux = surface_integral - @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces - @unpack normal_directions = cache.elements - - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - primary_element = element_ids[1, interface] - secondary_element = element_ids[2, interface] - - # Get the local side id on which to compute the flux - primary_side = element_side_ids[1, interface] - secondary_side = element_side_ids[2, interface] - - # initial index for the coordinate system on the secondary element - secondary_index = start_index[interface] - - # loop through the primary element coordinate system and compute the interface coupling - for primary_index in eachnode(dg) - # pull the primary and secondary states from the boundary u values - u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, interface) - u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, interface) - - # pull the outward pointing (normal) directional vector - # Note! this assumes a conforming approximation, more must be done in terms of the normals - # for hanging nodes and other non-conforming approximation spaces - outward_direction = get_surface_normal(normal_directions, primary_index, primary_side, - primary_element) - - # Call pointwise numerical flux with rotation. Direction is normalized inside this function - flux = surface_flux(u_ll, u_rr, outward_direction, equations) - - # Copy flux back to primary/secondary element storage - # Note the sign change for the normal flux in the secondary element! - for v in eachvariable(equations) - surface_flux_values[v, primary_index , primary_side , primary_element ] = flux[v] - surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -flux[v] - end - - # increment the index of the coordinate system in the secondary element - secondary_index += index_increment[interface] + @unpack surface_flux = surface_integral + @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces + @unpack normal_directions = cache.elements + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + primary_element = element_ids[1, interface] + secondary_element = element_ids[2, interface] + + # Get the local side id on which to compute the flux + primary_side = element_side_ids[1, interface] + secondary_side = element_side_ids[2, interface] + + # initial index for the coordinate system on the secondary element + secondary_index = start_index[interface] + + # loop through the primary element coordinate system and compute the interface coupling + for primary_index in eachnode(dg) + # pull the primary and secondary states from the boundary u values + u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, + interface) + u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, + interface) + + # pull the outward pointing (normal) directional vector + # Note! this assumes a conforming approximation, more must be done in terms of the normals + # for hanging nodes and other non-conforming approximation spaces + outward_direction = get_surface_normal(normal_directions, primary_index, + primary_side, + primary_element) + + # Call pointwise numerical flux with rotation. Direction is normalized inside this function + flux = surface_flux(u_ll, u_rr, outward_direction, equations) + + # Copy flux back to primary/secondary element storage + # Note the sign change for the normal flux in the secondary element! + for v in eachvariable(equations) + surface_flux_values[v, primary_index, primary_side, primary_element] = flux[v] + surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -flux[v] + end + + # increment the index of the coordinate system in the secondary element + secondary_index += index_increment[interface] + end end - end - return nothing + return nothing end # compute the numerical flux interface with nonconservative terms coupling between two elements @@ -195,204 +206,207 @@ function calc_interface_flux!(surface_flux_values, mesh::UnstructuredMesh2D, nonconservative_terms::True, equations, surface_integral, dg::DG, cache) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces - @unpack normal_directions = cache.elements - - @threaded for interface in eachinterface(dg, cache) - # Get the primary element index and local side index - primary_element = element_ids[1, interface] - primary_side = element_side_ids[1, interface] - - # Get neighboring element, local side index, and index increment on the - # secondary element - secondary_element = element_ids[2, interface] - secondary_side = element_side_ids[2, interface] - secondary_index_increment = index_increment[interface] - - secondary_index = start_index[interface] - for primary_index in eachnode(dg) - # pull the primary and secondary states from the boundary u values - u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, interface) - u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, interface) - - # pull the outward pointing (normal) directional vector - # Note! This assumes a conforming approximation, more must be done in terms - # of the normals for hanging nodes and other non-conforming approximation spaces - outward_direction = get_surface_normal(normal_directions, primary_index, primary_side, - primary_element) - - # Calculate the conservative portion of the numerical flux - # Call pointwise numerical flux with rotation. Direction is normalized - # inside this function - flux = surface_flux(u_ll, u_rr, outward_direction, equations) - - # Compute both nonconservative fluxes - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `outward_direction` twice. - noncons_primary = nonconservative_flux(u_ll, u_rr, outward_direction, outward_direction, equations) - noncons_secondary = nonconservative_flux(u_rr, u_ll, outward_direction, outward_direction, equations) - - # Copy flux to primary and secondary element storage - # Note the sign change for the components in the secondary element! - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, primary_index, primary_side, primary_element] = ( - flux[v] + 0.5 * noncons_primary[v]) - surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -( - flux[v] + 0.5 * noncons_secondary[v]) - end - - # increment the index of the coordinate system in the secondary element - secondary_index += secondary_index_increment + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces + @unpack normal_directions = cache.elements + + @threaded for interface in eachinterface(dg, cache) + # Get the primary element index and local side index + primary_element = element_ids[1, interface] + primary_side = element_side_ids[1, interface] + + # Get neighboring element, local side index, and index increment on the + # secondary element + secondary_element = element_ids[2, interface] + secondary_side = element_side_ids[2, interface] + secondary_index_increment = index_increment[interface] + + secondary_index = start_index[interface] + for primary_index in eachnode(dg) + # pull the primary and secondary states from the boundary u values + u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, + interface) + u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, + interface) + + # pull the outward pointing (normal) directional vector + # Note! This assumes a conforming approximation, more must be done in terms + # of the normals for hanging nodes and other non-conforming approximation spaces + outward_direction = get_surface_normal(normal_directions, primary_index, + primary_side, + primary_element) + + # Calculate the conservative portion of the numerical flux + # Call pointwise numerical flux with rotation. Direction is normalized + # inside this function + flux = surface_flux(u_ll, u_rr, outward_direction, equations) + + # Compute both nonconservative fluxes + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `outward_direction` twice. + noncons_primary = nonconservative_flux(u_ll, u_rr, outward_direction, + outward_direction, equations) + noncons_secondary = nonconservative_flux(u_rr, u_ll, outward_direction, + outward_direction, equations) + + # Copy flux to primary and secondary element storage + # Note the sign change for the components in the secondary element! + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, primary_index, primary_side, primary_element] = (flux[v] + + 0.5 * + noncons_primary[v]) + surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -(flux[v] + + 0.5 * + noncons_secondary[v]) + end + + # increment the index of the coordinate system in the secondary element + secondary_index += secondary_index_increment + end end - end - return nothing + return nothing end - # move the approximate solution onto physical boundaries within a "right-handed" element function prolong2boundaries!(cache, u, mesh::UnstructuredMesh2D, equations, surface_integral, dg::DG) - @unpack boundaries = cache - - @threaded for boundary in eachboundary(dg, cache) - element = boundaries.element_id[boundary] - side = boundaries.element_side_id[boundary] - - if side == 1 - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[v, l, boundary] = u[v, l, 1, element] - end - elseif side == 2 - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[v, l, boundary] = u[v, nnodes(dg), l, element] - end - elseif side == 3 - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[v, l, boundary] = u[v, l, nnodes(dg), element] - end - else # side == 4 - for l in eachnode(dg), v in eachvariable(equations) - boundaries.u[v, l, boundary] = u[v, 1, l, element] - end + @unpack boundaries = cache + + @threaded for boundary in eachboundary(dg, cache) + element = boundaries.element_id[boundary] + side = boundaries.element_side_id[boundary] + + if side == 1 + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[v, l, boundary] = u[v, l, 1, element] + end + elseif side == 2 + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[v, l, boundary] = u[v, nnodes(dg), l, element] + end + elseif side == 3 + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[v, l, boundary] = u[v, l, nnodes(dg), element] + end + else # side == 4 + for l in eachnode(dg), v in eachvariable(equations) + boundaries.u[v, l, boundary] = u[v, 1, l, element] + end + end end - end - return nothing + return nothing end - # TODO: Taal dimension agnostic function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic, mesh::Union{UnstructuredMesh2D, P4estMesh}, equations, surface_integral, dg::DG) - @assert isempty(eachboundary(dg, cache)) + @assert isempty(eachboundary(dg, cache)) end - # Function barrier for type stability function calc_boundary_flux!(cache, t, boundary_conditions, mesh::Union{UnstructuredMesh2D, P4estMesh}, equations, surface_integral, dg::DG) - @unpack boundary_condition_types, boundary_indices = boundary_conditions + @unpack boundary_condition_types, boundary_indices = boundary_conditions - calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices, - mesh, equations, surface_integral, dg) - return nothing + calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices, + mesh, equations, surface_integral, dg) + return nothing end - # Iterate over tuples of boundary condition types and associated indices # in a type-stable way using "lispy tuple programming". -function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N,Any}, - BC_indices::NTuple{N,Vector{Int}}, +function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any}, + BC_indices::NTuple{N, Vector{Int}}, mesh::Union{UnstructuredMesh2D, P4estMesh}, equations, surface_integral, dg::DG) where {N} - # Extract the boundary condition type and index vector - boundary_condition = first(BCs) - boundary_condition_indices = first(BC_indices) - # Extract the remaining types and indices to be processed later - remaining_boundary_conditions = Base.tail(BCs) - remaining_boundary_condition_indices = Base.tail(BC_indices) - - # process the first boundary condition type - calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices, - mesh, equations, surface_integral, dg) - - # recursively call this method with the unprocessed boundary types - calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions, - remaining_boundary_condition_indices, - mesh, equations, surface_integral, dg) - - return nothing + # Extract the boundary condition type and index vector + boundary_condition = first(BCs) + boundary_condition_indices = first(BC_indices) + # Extract the remaining types and indices to be processed later + remaining_boundary_conditions = Base.tail(BCs) + remaining_boundary_condition_indices = Base.tail(BC_indices) + + # process the first boundary condition type + calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices, + mesh, equations, surface_integral, dg) + + # recursively call this method with the unprocessed boundary types + calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions, + remaining_boundary_condition_indices, + mesh, equations, surface_integral, dg) + + return nothing end # terminate the type-stable iteration over tuples function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{}, mesh::Union{UnstructuredMesh2D, P4estMesh}, equations, surface_integral, dg::DG) - nothing + nothing end - function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, mesh::UnstructuredMesh2D, equations, surface_integral, dg::DG) - @unpack surface_flux_values = cache.elements - @unpack element_id, element_side_id = cache.boundaries - - @threaded for local_index in eachindex(boundary_indexing) - # use the local index to get the global boundary index from the pre-sorted list - boundary = boundary_indexing[local_index] - - # get the element and side IDs on the boundary element - element = element_id[boundary] - side = element_side_id[boundary] - - # calc boundary flux on the current boundary interface - for node in eachnode(dg) - calc_boundary_flux!(surface_flux_values, t, boundary_condition, - mesh, have_nonconservative_terms(equations), - equations, surface_integral, dg, cache, - node, side, element, boundary) + @unpack surface_flux_values = cache.elements + @unpack element_id, element_side_id = cache.boundaries + + @threaded for local_index in eachindex(boundary_indexing) + # use the local index to get the global boundary index from the pre-sorted list + boundary = boundary_indexing[local_index] + + # get the element and side IDs on the boundary element + element = element_id[boundary] + side = element_side_id[boundary] + + # calc boundary flux on the current boundary interface + for node in eachnode(dg) + calc_boundary_flux!(surface_flux_values, t, boundary_condition, + mesh, have_nonconservative_terms(equations), + equations, surface_integral, dg, cache, + node, side, element, boundary) + end end - end end - # inlined version of the boundary flux calculation along a physical interface where the # boundary flux values are set according to a particular `boundary_condition` function @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::UnstructuredMesh2D, nonconservative_terms::False, equations, surface_integral, dg::DG, cache, - node_index, side_index, element_index, boundary_index) - @unpack normal_directions = cache.elements - @unpack u, node_coordinates = cache.boundaries - @unpack surface_flux = surface_integral + node_index, side_index, element_index, + boundary_index) + @unpack normal_directions = cache.elements + @unpack u, node_coordinates = cache.boundaries + @unpack surface_flux = surface_integral - # pull the inner solution state from the boundary u values on the boundary element - u_inner = get_node_vars(u, equations, dg, node_index, boundary_index) + # pull the inner solution state from the boundary u values on the boundary element + u_inner = get_node_vars(u, equations, dg, node_index, boundary_index) - # pull the outward pointing (normal) directional vector - outward_direction = get_surface_normal(normal_directions, node_index, side_index, element_index) + # pull the outward pointing (normal) directional vector + outward_direction = get_surface_normal(normal_directions, node_index, side_index, + element_index) - # get the external solution values from the prescribed external state - x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index) + # get the external solution values from the prescribed external state + x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index) - # Call pointwise numerical flux function in the normal direction on the boundary - flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations) + # Call pointwise numerical flux function in the normal direction on the boundary + flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations) - for v in eachvariable(equations) - surface_flux_values[v, node_index, side_index, element_index] = flux[v] - end + for v in eachvariable(equations) + surface_flux_values[v, node_index, side_index, element_index] = flux[v] + end end # inlined version of the boundary flux and nonconseravtive terms calculation along a @@ -405,41 +419,45 @@ end mesh::UnstructuredMesh2D, nonconservative_terms::True, equations, surface_integral, dg::DG, cache, - node_index, side_index, element_index, boundary_index) - surface_flux, nonconservative_flux = surface_integral.surface_flux - @unpack normal_directions = cache.elements - @unpack u, node_coordinates = cache.boundaries - - # pull the inner solution state from the boundary u values on the boundary element - u_inner = get_node_vars(u, equations, dg, node_index, boundary_index) - - # pull the outward pointing (normal) directional vector - outward_direction = get_surface_normal(normal_directions, node_index, side_index, element_index) - - # get the external solution values from the prescribed external state - x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index) - - # Call pointwise numerical flux function for the conservative part - # in the normal direction on the boundary - flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations) - - # Compute pointwise nonconservative numerical flux at the boundary. - # In general, nonconservative fluxes can depend on both the contravariant - # vectors (normal direction) at the current node and the averaged ones. - # However, both are the same at watertight interfaces, so we pass the - # `outward_direction` twice. - # Note: This does not set any type of boundary condition for the nonconservative term - noncons_flux = nonconservative_flux(u_inner, u_inner, outward_direction, outward_direction, equations) - - for v in eachvariable(equations) - # Note the factor 0.5 necessary for the nonconservative fluxes based on - # the interpretation of global SBP operators coupled discontinuously via - # central fluxes/SATs - surface_flux_values[v, node_index, side_index, element_index] = flux[v] + 0.5 * noncons_flux[v] - end + node_index, side_index, element_index, + boundary_index) + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack normal_directions = cache.elements + @unpack u, node_coordinates = cache.boundaries + + # pull the inner solution state from the boundary u values on the boundary element + u_inner = get_node_vars(u, equations, dg, node_index, boundary_index) + + # pull the outward pointing (normal) directional vector + outward_direction = get_surface_normal(normal_directions, node_index, side_index, + element_index) + + # get the external solution values from the prescribed external state + x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index) + + # Call pointwise numerical flux function for the conservative part + # in the normal direction on the boundary + flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations) + + # Compute pointwise nonconservative numerical flux at the boundary. + # In general, nonconservative fluxes can depend on both the contravariant + # vectors (normal direction) at the current node and the averaged ones. + # However, both are the same at watertight interfaces, so we pass the + # `outward_direction` twice. + # Note: This does not set any type of boundary condition for the nonconservative term + noncons_flux = nonconservative_flux(u_inner, u_inner, outward_direction, + outward_direction, equations) + + for v in eachvariable(equations) + # Note the factor 0.5 necessary for the nonconservative fluxes based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + surface_flux_values[v, node_index, side_index, element_index] = flux[v] + + 0.5 * + noncons_flux[v] + end end - # Note! The local side numbering for the unstructured quadrilateral element implementation differs # from the structured TreeMesh or StructuredMesh local side numbering: # @@ -456,55 +474,58 @@ end # Therefore, we require a different surface integral routine here despite their similar structure. function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D, equations, surface_integral, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis - @unpack surface_flux_values = cache.elements - - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg), v in eachvariable(equations) - # surface contribution along local sides 2 and 4 (fixed x and y varies) - du[v, 1, l, element] += ( surface_flux_values[v, l, 4, element] - * boundary_interpolation[1, 1] ) - du[v, nnodes(dg), l, element] += ( surface_flux_values[v, l, 2, element] - * boundary_interpolation[nnodes(dg), 2] ) - # surface contribution along local sides 1 and 3 (fixed y and x varies) - du[v, l, 1, element] += ( surface_flux_values[v, l, 1, element] - * boundary_interpolation[1, 1] ) - du[v, l, nnodes(dg), element] += ( surface_flux_values[v, l, 3, element] - * boundary_interpolation[nnodes(dg), 2] ) + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg), v in eachvariable(equations) + # surface contribution along local sides 2 and 4 (fixed x and y varies) + du[v, 1, l, element] += (surface_flux_values[v, l, 4, element] + * + boundary_interpolation[1, 1]) + du[v, nnodes(dg), l, element] += (surface_flux_values[v, l, 2, element] + * + boundary_interpolation[nnodes(dg), 2]) + # surface contribution along local sides 1 and 3 (fixed y and x varies) + du[v, l, 1, element] += (surface_flux_values[v, l, 1, element] + * + boundary_interpolation[1, 1]) + du[v, l, nnodes(dg), element] += (surface_flux_values[v, l, 3, element] + * + boundary_interpolation[nnodes(dg), 2]) + end end - end - return nothing + return nothing end - # This routine computes the maximum value of the discrete metric identities necessary to ensure # that the approxmiation will be free-stream preserving (i.e. a constant solution remains constant) # on a curvilinear mesh. # Note! Independent of the equation system and is only a check on the discrete mapping terms. # Can be used for a metric identities check on StructuredMesh{2} or UnstructuredMesh2D function max_discrete_metric_identities(dg::DGSEM, cache) - @unpack derivative_matrix = dg.basis - @unpack contravariant_vectors = cache.elements + @unpack derivative_matrix = dg.basis + @unpack contravariant_vectors = cache.elements - ndims_ = size(contravariant_vectors, 1) + ndims_ = size(contravariant_vectors, 1) - metric_id_dx = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg)) - metric_id_dy = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg)) + metric_id_dx = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg)) + metric_id_dy = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg)) - max_metric_ids = zero(eltype(contravariant_vectors)) + max_metric_ids = zero(eltype(contravariant_vectors)) - for i in 1:ndims_, element in eachelement(dg, cache) - # compute D*Ja_1^i + Ja_2^i*D^T - @views mul!(metric_id_dx, derivative_matrix, contravariant_vectors[i, 1, :, :, element]) - @views mul!(metric_id_dy, contravariant_vectors[i, 2, :, :, element], derivative_matrix') - local_max_metric_ids = maximum( abs.(metric_id_dx + metric_id_dy) ) + for i in 1:ndims_, element in eachelement(dg, cache) + # compute D*Ja_1^i + Ja_2^i*D^T + @views mul!(metric_id_dx, derivative_matrix, + contravariant_vectors[i, 1, :, :, element]) + @views mul!(metric_id_dy, contravariant_vectors[i, 2, :, :, element], + derivative_matrix') + local_max_metric_ids = maximum(abs.(metric_id_dx + metric_id_dy)) - max_metric_ids = max(max_metric_ids, local_max_metric_ids) - end + max_metric_ids = max(max_metric_ids, local_max_metric_ids) + end - return max_metric_ids + return max_metric_ids end - - end # @muladd diff --git a/src/solvers/dgsem_unstructured/indicators_2d.jl b/src/solvers/dgsem_unstructured/indicators_2d.jl index fe841e78687..8052534ad48 100644 --- a/src/solvers/dgsem_unstructured/indicators_2d.jl +++ b/src/solvers/dgsem_unstructured/indicators_2d.jl @@ -3,22 +3,22 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent function apply_smoothing!(mesh::UnstructuredMesh2D, alpha, alpha_tmp, dg, cache) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_tmp .= alpha - - # Loop over interfaces - for interface in eachinterface(dg, cache) - # Get neighboring element ids - left = cache.interfaces.element_ids[1, interface] - right = cache.interfaces.element_ids[2, interface] + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_tmp .= alpha - # Apply smoothing - alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) - alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) - end -end + # Loop over interfaces + for interface in eachinterface(dg, cache) + # Get neighboring element ids + left = cache.interfaces.element_ids[1, interface] + right = cache.interfaces.element_ids[2, interface] + # Apply smoothing + alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left]) + alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right]) + end +end end # @muladd diff --git a/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl b/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl index 8ad018bd08a..75b9a1f4da2 100644 --- a/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl +++ b/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl @@ -3,148 +3,161 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # transfinite mapping formula from a point (xi, eta) in reference space [-1,1]^2 to a point # (x,y) in physical coordinate space for a quadrilateral element with general curved sides # Alg. 98 from the blue book of Kopriva function transfinite_quad_map(xi, eta, surface_curves::AbstractVector{<:CurvedSurface}) - # evaluate the gamma curves to get the four corner points of the element - x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1]) - x_corner2, y_corner2 = evaluate_at( 1.0, surface_curves[1]) - x_corner3, y_corner3 = evaluate_at( 1.0, surface_curves[3]) - x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3]) - - # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get - # the value (x,y) in physical space - x1, y1 = evaluate_at(xi , surface_curves[1]) - x2, y2 = evaluate_at(eta, surface_curves[2]) - x3, y3 = evaluate_at(xi , surface_curves[3]) - x4, y4 = evaluate_at(eta, surface_curves[4]) - - x = ( 0.5 * ( (1.0 - xi) * x4 + (1.0 + xi) * x2 + (1.0 - eta) * x1 + (1.0 + eta) * x3 ) - - 0.25 * ( (1.0 - xi) * ( (1.0 - eta) * x_corner1 + (1.0 + eta) * x_corner4 ) - + (1.0 + xi) * ( (1.0 - eta) * x_corner2 + (1.0 + eta) * x_corner3 ) ) ) - - y = ( 0.5 * ( (1.0 - xi) * y4 + (1.0 + xi) * y2 + (1.0 - eta) * y1 + (1.0 + eta) * y3 ) - - 0.25 * ( (1.0 - xi) * ( (1.0 - eta) * y_corner1 + (1.0 + eta) * y_corner4 ) - + (1.0 + xi) * ( (1.0 - eta) * y_corner2 + (1.0 + eta) * y_corner3 ) ) ) - - return x, y + # evaluate the gamma curves to get the four corner points of the element + x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1]) + x_corner2, y_corner2 = evaluate_at(1.0, surface_curves[1]) + x_corner3, y_corner3 = evaluate_at(1.0, surface_curves[3]) + x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3]) + + # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get + # the value (x,y) in physical space + x1, y1 = evaluate_at(xi, surface_curves[1]) + x2, y2 = evaluate_at(eta, surface_curves[2]) + x3, y3 = evaluate_at(xi, surface_curves[3]) + x4, y4 = evaluate_at(eta, surface_curves[4]) + + x = (0.5 * + ((1.0 - xi) * x4 + (1.0 + xi) * x2 + (1.0 - eta) * x1 + (1.0 + eta) * x3) + - + 0.25 * ((1.0 - xi) * ((1.0 - eta) * x_corner1 + (1.0 + eta) * x_corner4) + + (1.0 + xi) * ((1.0 - eta) * x_corner2 + (1.0 + eta) * x_corner3))) + + y = (0.5 * + ((1.0 - xi) * y4 + (1.0 + xi) * y2 + (1.0 - eta) * y1 + (1.0 + eta) * y3) + - + 0.25 * ((1.0 - xi) * ((1.0 - eta) * y_corner1 + (1.0 + eta) * y_corner4) + + (1.0 + xi) * ((1.0 - eta) * y_corner2 + (1.0 + eta) * y_corner3))) + + return x, y end - # Compute the metric terms for the general curved sided quadrilateral transfitie mapping # Alg. 99 from the blue book of Kopriva -function transfinite_quad_map_metrics(xi, eta, surface_curves::AbstractVector{<:CurvedSurface}) - - # evaluate the gamma curves to get the four corner points of the element - x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1]) - x_corner2, y_corner2 = evaluate_at( 1.0, surface_curves[1]) - x_corner3, y_corner3 = evaluate_at( 1.0, surface_curves[3]) - x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3]) - - # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get - # the value (x,y) in physical space - x1, y1 = evaluate_at(xi , surface_curves[1]) - x2, y2 = evaluate_at(eta, surface_curves[2]) - x3, y3 = evaluate_at(xi , surface_curves[3]) - x4, y4 = evaluate_at(eta, surface_curves[4]) - - # evaluate along the derivative of the gamma curves at a particular point (ξ, η) in - # computational space to get the value (x_prime,y_prime) in physical space - x1_prime, y1_prime = derivative_at(xi , surface_curves[1]) - x2_prime, y2_prime = derivative_at(eta, surface_curves[2]) - x3_prime, y3_prime = derivative_at(xi , surface_curves[3]) - x4_prime, y4_prime = derivative_at(eta, surface_curves[4]) - - X_xi = ( 0.5 * (x2 - x4 + (1.0 - eta) * x1_prime + (1.0 + eta) * x3_prime) - -0.25 * ((1.0 - eta) * (x_corner2 - x_corner1) + (1.0 + eta) * (x_corner3 - x_corner4)) ) - - X_eta = ( 0.5 * ((1.0 - xi) * x4_prime + (1.0 + xi) * x2_prime + x3 - x1) - -0.25 * ((1.0 - xi) * (x_corner4 - x_corner1) + (1.0 + xi) * (x_corner3 - x_corner2)) ) - - Y_xi = ( 0.5 * (y2 - y4 + (1.0 - eta) * y1_prime + (1.0 + eta) * y3_prime) - -0.25 * ((1.0 - eta) * (y_corner2 - y_corner1) + (1.0 + eta) * (y_corner3 - y_corner4)) ) - - Y_eta = ( 0.5 * ((1.0 - xi) * y4_prime + (1.0 + xi) * y2_prime + y3 - y1) - -0.25 * ((1.0 - xi) * (y_corner4 - y_corner1) + (1.0 + xi) * (y_corner3 - y_corner2)) ) - - return X_xi, X_eta, Y_xi, Y_eta +function transfinite_quad_map_metrics(xi, eta, + surface_curves::AbstractVector{<:CurvedSurface}) + + # evaluate the gamma curves to get the four corner points of the element + x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1]) + x_corner2, y_corner2 = evaluate_at(1.0, surface_curves[1]) + x_corner3, y_corner3 = evaluate_at(1.0, surface_curves[3]) + x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3]) + + # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get + # the value (x,y) in physical space + x1, y1 = evaluate_at(xi, surface_curves[1]) + x2, y2 = evaluate_at(eta, surface_curves[2]) + x3, y3 = evaluate_at(xi, surface_curves[3]) + x4, y4 = evaluate_at(eta, surface_curves[4]) + + # evaluate along the derivative of the gamma curves at a particular point (ξ, η) in + # computational space to get the value (x_prime,y_prime) in physical space + x1_prime, y1_prime = derivative_at(xi, surface_curves[1]) + x2_prime, y2_prime = derivative_at(eta, surface_curves[2]) + x3_prime, y3_prime = derivative_at(xi, surface_curves[3]) + x4_prime, y4_prime = derivative_at(eta, surface_curves[4]) + + X_xi = (0.5 * (x2 - x4 + (1.0 - eta) * x1_prime + (1.0 + eta) * x3_prime) + - + 0.25 * ((1.0 - eta) * (x_corner2 - x_corner1) + + (1.0 + eta) * (x_corner3 - x_corner4))) + + X_eta = (0.5 * ((1.0 - xi) * x4_prime + (1.0 + xi) * x2_prime + x3 - x1) + - + 0.25 * ((1.0 - xi) * (x_corner4 - x_corner1) + + (1.0 + xi) * (x_corner3 - x_corner2))) + + Y_xi = (0.5 * (y2 - y4 + (1.0 - eta) * y1_prime + (1.0 + eta) * y3_prime) + - + 0.25 * ((1.0 - eta) * (y_corner2 - y_corner1) + + (1.0 + eta) * (y_corner3 - y_corner4))) + + Y_eta = (0.5 * ((1.0 - xi) * y4_prime + (1.0 + xi) * y2_prime + y3 - y1) + - + 0.25 * ((1.0 - xi) * (y_corner4 - y_corner1) + + (1.0 + xi) * (y_corner3 - y_corner2))) + + return X_xi, X_eta, Y_xi, Y_eta end - # construct the (x,y) node coordinates in the volume of a curved sided element -function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, nodes, +function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, + nodes, surface_curves::AbstractVector{<:CurvedSurface}) + for j in eachindex(nodes), i in eachindex(nodes) + node_coordinates[:, i, j, element] .= transfinite_quad_map(nodes[i], nodes[j], + surface_curves) + end - for j in eachindex(nodes), i in eachindex(nodes) - node_coordinates[:, i, j, element] .= transfinite_quad_map(nodes[i], nodes[j], surface_curves) - end - - return node_coordinates + return node_coordinates end - # construct the metric terms for a curved sided element function calc_metric_terms!(jacobian_matrix, element, nodes, surface_curves::AbstractVector{<:CurvedSurface}) - # storage format: - # jacobian_matrix[1,1,:,:,:] <- X_xi - # jacobian_matrix[1,2,:,:,:] <- X_eta - # jacobian_matrix[2,1,:,:,:] <- Y_xi - # jacobian_matrix[2,2,:,:,:] <- Y_eta - for j in eachindex(nodes), i in eachindex(nodes) - (jacobian_matrix[1, 1, i, j, element], - jacobian_matrix[1, 2, i, j, element], - jacobian_matrix[2, 1, i, j, element], - jacobian_matrix[2, 2, i, j, element]) = transfinite_quad_map_metrics(nodes[i], nodes[j], - surface_curves) - end - - return jacobian_matrix + # storage format: + # jacobian_matrix[1,1,:,:,:] <- X_xi + # jacobian_matrix[1,2,:,:,:] <- X_eta + # jacobian_matrix[2,1,:,:,:] <- Y_xi + # jacobian_matrix[2,2,:,:,:] <- Y_eta + for j in eachindex(nodes), i in eachindex(nodes) + (jacobian_matrix[1, 1, i, j, element], + jacobian_matrix[1, 2, i, j, element], + jacobian_matrix[2, 1, i, j, element], + jacobian_matrix[2, 2, i, j, element]) = transfinite_quad_map_metrics(nodes[i], + nodes[j], + surface_curves) + end + + return jacobian_matrix end - # construct the normal direction vectors (but not actually normalized) for a curved sided element # normalization occurs on the fly during the surface flux computation function calc_normal_directions!(normal_directions, element, nodes, surface_curves::AbstractVector{<:CurvedSurface}) - # normal directions on the boundary for the left (local side 4) and right (local side 2) - for j in eachindex(nodes) - # side 2 - X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(1.0, nodes[j], surface_curves) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, j, 2, element] = sign(Jtemp) * ( Y_eta ) - normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta ) - - # side 4 - X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(-1.0, nodes[j], surface_curves) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, j, 4, element] = -sign(Jtemp) * ( Y_eta ) - normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta ) - end - - # normal directions on the boundary for the top (local side 3) and bottom (local side 1) - for i in eachindex(nodes) - # side 1 - X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], -1.0, surface_curves) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi ) - normal_directions[2, i, 1, element] = -sign(Jtemp) * ( X_xi ) - - # side 3 - X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], 1.0, surface_curves) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi ) - normal_directions[2, i, 3, element] = sign(Jtemp) * ( X_xi ) - end - - return normal_directions + # normal directions on the boundary for the left (local side 4) and right (local side 2) + for j in eachindex(nodes) + # side 2 + X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(1.0, nodes[j], + surface_curves) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, j, 2, element] = sign(Jtemp) * (Y_eta) + normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta) + + # side 4 + X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(-1.0, nodes[j], + surface_curves) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, j, 4, element] = -sign(Jtemp) * (Y_eta) + normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta) + end + + # normal directions on the boundary for the top (local side 3) and bottom (local side 1) + for i in eachindex(nodes) + # side 1 + X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], -1.0, + surface_curves) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi) + normal_directions[2, i, 1, element] = -sign(Jtemp) * (X_xi) + + # side 3 + X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], 1.0, + surface_curves) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi) + normal_directions[2, i, 3, element] = sign(Jtemp) * (X_xi) + end + + return normal_directions end - - end # @muladd diff --git a/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl b/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl index d4f9cf5d49d..7ceba93188d 100644 --- a/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl +++ b/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl @@ -3,114 +3,112 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # mapping formula from a point (xi, eta) in reference space [-1,1]^2 to a point (x,y) # in physical coordinate space for a quadrilateral element with straight sides # Alg. 95 from the blue book of Kopriva function straight_side_quad_map(xi, eta, corner_points) + x = 0.25 * (corner_points[1, 1] * (1.0 - xi) * (1.0 - eta) + + corner_points[2, 1] * (1.0 + xi) * (1.0 - eta) + + corner_points[3, 1] * (1.0 + xi) * (1.0 + eta) + + corner_points[4, 1] * (1.0 - xi) * (1.0 + eta)) - x = (0.25 * ( corner_points[1,1] * (1.0 - xi) * (1.0 - eta) - + corner_points[2,1] * (1.0 + xi) * (1.0 - eta) - + corner_points[3,1] * (1.0 + xi) * (1.0 + eta) - + corner_points[4,1] * (1.0 - xi) * (1.0 + eta)) ) - - y = (0.25 * ( corner_points[1,2] * (1.0 - xi) * (1.0 - eta) - + corner_points[2,2] * (1.0 + xi) * (1.0 - eta) - + corner_points[3,2] * (1.0 + xi) * (1.0 + eta) - + corner_points[4,2] * (1.0 - xi) * (1.0 + eta)) ) + y = 0.25 * (corner_points[1, 2] * (1.0 - xi) * (1.0 - eta) + + corner_points[2, 2] * (1.0 + xi) * (1.0 - eta) + + corner_points[3, 2] * (1.0 + xi) * (1.0 + eta) + + corner_points[4, 2] * (1.0 - xi) * (1.0 + eta)) - return x, y + return x, y end - # Compute the metric terms for the straight sided quadrilateral mapping # Alg. 100 from the blue book of Kopriva function straight_side_quad_map_metrics(xi, eta, corner_points) + X_xi = 0.25 * ((1.0 - eta) * (corner_points[2, 1] - corner_points[1, 1]) + + (1.0 + eta) * (corner_points[3, 1] - corner_points[4, 1])) - X_xi = ( 0.25 * ( (1.0 - eta) * (corner_points[2,1] - corner_points[1,1]) - + (1.0 + eta) * (corner_points[3,1] - corner_points[4,1])) ) + X_eta = 0.25 * ((1.0 - xi) * (corner_points[4, 1] - corner_points[1, 1]) + + (1.0 + xi) * (corner_points[3, 1] - corner_points[2, 1])) - X_eta = ( 0.25 * ( (1.0 - xi) * (corner_points[4,1] - corner_points[1,1]) - + (1.0 + xi) * (corner_points[3,1] - corner_points[2,1])) ) + Y_xi = 0.25 * ((1.0 - eta) * (corner_points[2, 2] - corner_points[1, 2]) + + (1.0 + eta) * (corner_points[3, 2] - corner_points[4, 2])) - Y_xi = ( 0.25 * ( (1.0 - eta) * (corner_points[2,2] - corner_points[1,2]) - + (1.0 + eta) * (corner_points[3,2] - corner_points[4,2])) ) + Y_eta = 0.25 * ((1.0 - xi) * (corner_points[4, 2] - corner_points[1, 2]) + + (1.0 + xi) * (corner_points[3, 2] - corner_points[2, 2])) - Y_eta = ( 0.25 * ( (1.0 - xi) * (corner_points[4,2] - corner_points[1,2]) - + (1.0 + xi) * (corner_points[3,2] - corner_points[2,2])) ) - - return X_xi, X_eta, Y_xi, Y_eta + return X_xi, X_eta, Y_xi, Y_eta end - # construct the (x,y) node coordinates in the volume of a straight sided element -function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, nodes, corners) - - for j in eachindex(nodes), i in eachindex(nodes) - node_coordinates[:, i ,j ,element] .= straight_side_quad_map(nodes[i], nodes[j], corners) - end - - return node_coordinates +function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, + nodes, corners) + for j in eachindex(nodes), i in eachindex(nodes) + node_coordinates[:, i, j, element] .= straight_side_quad_map(nodes[i], nodes[j], + corners) + end + + return node_coordinates end - # construct the metric terms for a straight sided element function calc_metric_terms!(jacobian_matrix, element, nodes, corners) - # storage format: - # jacobian_matrix[1,1,:,:,:] <- X_xi - # jacobian_matrix[1,2,:,:,:] <- X_eta - # jacobian_matrix[2,1,:,:,:] <- Y_xi - # jacobian_matrix[2,2,:,:,:] <- Y_eta - for j in eachindex(nodes), i in eachindex(nodes) - (jacobian_matrix[1, 1, i, j, element], - jacobian_matrix[1, 2, i, j, element], - jacobian_matrix[2, 1, i, j, element], - jacobian_matrix[2, 2, i, j, element]) = straight_side_quad_map_metrics(nodes[i], nodes[j], - corners) - end - - return jacobian_matrix + # storage format: + # jacobian_matrix[1,1,:,:,:] <- X_xi + # jacobian_matrix[1,2,:,:,:] <- X_eta + # jacobian_matrix[2,1,:,:,:] <- Y_xi + # jacobian_matrix[2,2,:,:,:] <- Y_eta + for j in eachindex(nodes), i in eachindex(nodes) + (jacobian_matrix[1, 1, i, j, element], + jacobian_matrix[1, 2, i, j, element], + jacobian_matrix[2, 1, i, j, element], + jacobian_matrix[2, 2, i, j, element]) = straight_side_quad_map_metrics(nodes[i], + nodes[j], + corners) + end + + return jacobian_matrix end - # construct the normal direction vectors (but not actually normalized) for a straight sided element # normalization occurs on the fly during the surface flux computation function calc_normal_directions!(normal_directions, element, nodes, corners) - # normal directions on the boundary for the left (local side 4) and right (local side 2) - for j in eachindex(nodes) - # side 2 - X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(1.0, nodes[j], corners) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, j, 2, element] = sign(Jtemp) * ( Y_eta ) - normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta ) - - # side 4 - X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(-1.0, nodes[j], corners) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, j, 4, element] = -sign(Jtemp) * ( Y_eta ) - normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta ) - end - - # normal directions on the boundary for the top (local side 3) and bottom (local side 1) - for i in eachindex(nodes) - # side 1 - X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], -1.0, corners) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi ) - normal_directions[2, i, 1, element] = -sign(Jtemp) * ( X_xi ) - - # side 3 - X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], 1.0, corners) - Jtemp = X_xi * Y_eta - X_eta * Y_xi - normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi ) - normal_directions[2, i, 3, element] = sign(Jtemp) * ( X_xi ) - end - - return normal_directions + # normal directions on the boundary for the left (local side 4) and right (local side 2) + for j in eachindex(nodes) + # side 2 + X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(1.0, nodes[j], + corners) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, j, 2, element] = sign(Jtemp) * (Y_eta) + normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta) + + # side 4 + X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(-1.0, nodes[j], + corners) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, j, 4, element] = -sign(Jtemp) * (Y_eta) + normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta) + end + + # normal directions on the boundary for the top (local side 3) and bottom (local side 1) + for i in eachindex(nodes) + # side 1 + X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], -1.0, + corners) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi) + normal_directions[2, i, 1, element] = -sign(Jtemp) * (X_xi) + + # side 3 + X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], 1.0, + corners) + Jtemp = X_xi * Y_eta - X_eta * Y_xi + normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi) + normal_directions[2, i, 3, element] = sign(Jtemp) * (X_xi) + end + + return normal_directions end - - end # @muladd diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 5315e695bd6..cad5542aae3 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -3,7 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ UnstructuredSortedBoundaryTypes @@ -13,86 +13,88 @@ It stores a set of global indices for each boundary condition type to expedite c during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs<:NTuple{N, Any}} - boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices - boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} + boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet + boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file end - # constructor that "eats" the original boundary condition dictionary and sorts the information # from the `UnstructuredBoundaryContainer2D` in cache.boundaries according to the boundary types # and stores the associated global boundary indexing in NTuple function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) - # extract the unique boundary function routines from the dictionary - boundary_condition_types = Tuple(unique(collect(values(boundary_conditions)))) - n_boundary_types = length(boundary_condition_types) - boundary_indices = ntuple(_ -> [], n_boundary_types) + # extract the unique boundary function routines from the dictionary + boundary_condition_types = Tuple(unique(collect(values(boundary_conditions)))) + n_boundary_types = length(boundary_condition_types) + boundary_indices = ntuple(_ -> [], n_boundary_types) - container = UnstructuredSortedBoundaryTypes{n_boundary_types, typeof(boundary_condition_types)}( - boundary_condition_types, boundary_indices, boundary_conditions) + container = UnstructuredSortedBoundaryTypes{n_boundary_types, + typeof(boundary_condition_types)}(boundary_condition_types, + boundary_indices, + boundary_conditions) - initialize!(container, cache) + initialize!(container, cache) end - -function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N}, cache) where N - @unpack boundary_dictionary, boundary_condition_types = boundary_types_container - - unique_names = unique(cache.boundaries.name) - - if mpi_isparallel() - # Exchange of boundaries names - send_buffer = Vector{UInt8}(join(unique_names, "\0")) - push!(send_buffer, 0) - if mpi_isroot() - recv_buffer_length = MPI.Gather(length(send_buffer), mpi_root(), mpi_comm()) - recv_buffer = Vector{UInt8}(undef, sum(recv_buffer_length)) - MPI.Gatherv!(send_buffer, MPI.VBuffer(recv_buffer, recv_buffer_length), mpi_root(), mpi_comm()) - all_names = unique(Symbol.(split(String(recv_buffer), "\0"; keepempty=false))) - for key in keys(boundary_dictionary) - if !(key in all_names) - println(stderr, "ERROR: Key $(repr(key)) is not a valid boundary name") - MPI.Abort(mpi_comm(), 1) +function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N}, + cache) where {N} + @unpack boundary_dictionary, boundary_condition_types = boundary_types_container + + unique_names = unique(cache.boundaries.name) + + if mpi_isparallel() + # Exchange of boundaries names + send_buffer = Vector{UInt8}(join(unique_names, "\0")) + push!(send_buffer, 0) + if mpi_isroot() + recv_buffer_length = MPI.Gather(length(send_buffer), mpi_root(), mpi_comm()) + recv_buffer = Vector{UInt8}(undef, sum(recv_buffer_length)) + MPI.Gatherv!(send_buffer, MPI.VBuffer(recv_buffer, recv_buffer_length), + mpi_root(), mpi_comm()) + all_names = unique(Symbol.(split(String(recv_buffer), "\0"; + keepempty = false))) + for key in keys(boundary_dictionary) + if !(key in all_names) + println(stderr, + "ERROR: Key $(repr(key)) is not a valid boundary name") + MPI.Abort(mpi_comm(), 1) + end + end + else + MPI.Gather(length(send_buffer), mpi_root(), mpi_comm()) + MPI.Gatherv!(send_buffer, nothing, mpi_root(), mpi_comm()) end - end else - MPI.Gather(length(send_buffer), mpi_root(), mpi_comm()) - MPI.Gatherv!(send_buffer, nothing, mpi_root(), mpi_comm()) - end - else - for key in keys(boundary_dictionary) - if !(key in unique_names) - error("Key $(repr(key)) is not a valid boundary name") - end + for key in keys(boundary_dictionary) + if !(key in unique_names) + error("Key $(repr(key)) is not a valid boundary name") + end + end end - end - # Verify that each boundary has a boundary condition - for name in unique_names - if name !== Symbol("---") && !haskey(boundary_dictionary, name) - error("No boundary condition specified for boundary $(repr(name))") + # Verify that each boundary has a boundary condition + for name in unique_names + if name !== Symbol("---") && !haskey(boundary_dictionary, name) + error("No boundary condition specified for boundary $(repr(name))") + end end - end - # pull and sort the indexing for each boundary type - _boundary_indices = Vector{Any}(nothing, N) - for j in 1:N - indices_for_current_type = Int[] - for (test_name, test_condition) in boundary_dictionary - temp_indices = findall(x->x===test_name, cache.boundaries.name) - if test_condition === boundary_condition_types[j] - indices_for_current_type = vcat(indices_for_current_type, temp_indices) - end + # pull and sort the indexing for each boundary type + _boundary_indices = Vector{Any}(nothing, N) + for j in 1:N + indices_for_current_type = Int[] + for (test_name, test_condition) in boundary_dictionary + temp_indices = findall(x -> x === test_name, cache.boundaries.name) + if test_condition === boundary_condition_types[j] + indices_for_current_type = vcat(indices_for_current_type, temp_indices) + end + end + _boundary_indices[j] = sort!(indices_for_current_type) end - _boundary_indices[j] = sort!(indices_for_current_type) - end - # convert the work array with the boundary indices into a tuple - boundary_types_container.boundary_indices = Tuple(_boundary_indices) + # convert the work array with the boundary indices into a tuple + boundary_types_container.boundary_indices = Tuple(_boundary_indices) - return boundary_types_container + return boundary_types_container end - - end # @muladd diff --git a/src/solvers/fdsbp_tree/fdsbp.jl b/src/solvers/fdsbp_tree/fdsbp.jl index b89d59c1156..cbb6fd16243 100644 --- a/src/solvers/fdsbp_tree/fdsbp.jl +++ b/src/solvers/fdsbp_tree/fdsbp.jl @@ -6,7 +6,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent """ FDSBP(D_SBP; surface_integral, volume_integral) @@ -25,13 +25,13 @@ The other arguments have the same meaning as in [`DG`](@ref) or [`DGSEM`](@ref). !!! warning "Experimental implementation (upwind SBP)" This is an experimental feature and may change in future releases. """ -const FDSBP = DG{Basis} where {Basis<:AbstractDerivativeOperator} +const FDSBP = DG{Basis} where {Basis <: AbstractDerivativeOperator} function FDSBP(D_SBP::AbstractDerivativeOperator; surface_integral, volume_integral) - return DG(D_SBP, nothing #= mortar =#, surface_integral, volume_integral) + # `nothing` is passed as `mortar` + return DG(D_SBP, nothing, surface_integral, volume_integral) end - # General interface methods for SummationByPartsOperators.jl and Trixi.jl nnodes(D::AbstractDerivativeOperator) = size(D, 1) eachnode(D::AbstractDerivativeOperator) = Base.OneTo(nnodes(D)) @@ -42,7 +42,6 @@ get_nodes(D::AbstractDerivativeOperator) = grid(D) polydeg(D::AbstractDerivativeOperator) = size(D, 1) - 1 polydeg(fdsbp::FDSBP) = polydeg(fdsbp.basis) - # TODO: FD. No mortars supported at the moment init_mortars(cell_ids, mesh, elements, mortar::Nothing) = nothing create_cache(mesh, equations, mortar::Nothing, uEltype) = NamedTuple() @@ -50,25 +49,21 @@ nmortars(mortar::Nothing) = 0 function prolong2mortars!(cache, u, mesh, equations, mortar::Nothing, surface_integral, dg::DG) - @assert isempty(eachmortar(dg, cache)) + @assert isempty(eachmortar(dg, cache)) end function calc_mortar_flux!(surface_flux_values, mesh, nonconservative_terms, equations, mortar::Nothing, surface_integral, dg::DG, cache) - @assert isempty(eachmortar(dg, cache)) + @assert isempty(eachmortar(dg, cache)) end - # We do not use a specialized setup to analyze solutions SolutionAnalyzer(D::AbstractDerivativeOperator) = D - # dimension-specific implementations include("fdsbp_1d.jl") include("fdsbp_2d.jl") include("fdsbp_3d.jl") - - end # @muladd diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl index 2cd6edd04fd..c7712074940 100644 --- a/src/solvers/fdsbp_tree/fdsbp_1d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl @@ -6,78 +6,79 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # 1D caches function create_cache(mesh::TreeMesh{1}, equations, volume_integral::VolumeIntegralStrongForm, dg, uEltype) + prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef, + ntuple(_ -> nnodes(dg), + ndims(mesh))...) + f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}( - undef, ntuple(_ -> nnodes(dg), ndims(mesh))...) - f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - - return (; f_threaded,) + return (; f_threaded) end function create_cache(mesh::TreeMesh{1}, equations, volume_integral::VolumeIntegralUpwind, dg, uEltype) + u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), + Val{nvariables(equations)}())) + f = StructArray([(u_node, u_node)]) + f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) + for _ in 1:Threads.nthreads()] + + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) + f_minus_threaded = [f_minus] + f_plus_threaded = [f_plus] + for i in 2:Threads.nthreads() + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) + push!(f_minus_threaded, f_minus) + push!(f_plus_threaded, f_plus) + end - u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}())) - f = StructArray([(u_node, u_node)]) - f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()] - - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) - f_minus_threaded = [f_minus] - f_plus_threaded = [f_plus] - for i in 2:Threads.nthreads() - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) - push!(f_minus_threaded, f_minus) - push!(f_plus_threaded, f_plus) - end - - return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,) + return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded) end - # 2D volume integral contributions for `VolumeIntegralStrongForm` function calc_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) - D = dg.basis # SBP derivative operator - @unpack f_threaded = cache - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - f_element = f_threaded[Threads.threadid()] - u_element = view(u_vectors, :, element) - - # x direction - @. f_element = flux(u_element, 1, equations) - mul!(view(du_vectors, :, element), D, view(f_element, :), - one(eltype(du)), one(eltype(du))) - end - - return nothing -end + D = dg.basis # SBP derivative operator + @unpack f_threaded = cache + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) + end + + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + f_element = f_threaded[Threads.threadid()] + u_element = view(u_vectors, :, element) + # x direction + @. f_element = flux(u_element, 1, equations) + mul!(view(du_vectors, :, element), D, view(f_element, :), + one(eltype(du)), one(eltype(du))) + end + + return nothing +end # 1D volume integral contributions for `VolumeIntegralUpwind`. # Note that the plus / minus notation of the operators does not refer to the @@ -91,78 +92,78 @@ function calc_volume_integral!(du, u, nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) - # Assume that - # dg.basis isa SummationByPartsOperators.UpwindOperators - D_minus = dg.basis.minus # Upwind SBP D^- derivative operator - D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator - @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache - @unpack splitting = volume_integral - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - # f_minus_plus_element wraps the storage provided by f_minus_element and - # f_plus_element such that we can use a single plain broadcasting below. - # f_minus_element and f_plus_element are updated in broadcasting calls - # of the form `@. f_minus_plus_element = ...`. - f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] - f_minus_element = f_minus_threaded[Threads.threadid()] - f_plus_element = f_plus_threaded[Threads.threadid()] - u_element = view(u_vectors, :, element) - - # x direction - @. f_minus_plus_element = splitting(u_element, 1, equations) - mul!(view(du_vectors, :, element), D_plus, view(f_minus_element, :), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, :, element), D_minus, view(f_plus_element, :), - one(eltype(du)), one(eltype(du))) - end - - return nothing -end + # Assume that + # dg.basis isa SummationByPartsOperators.UpwindOperators + D_minus = dg.basis.minus # Upwind SBP D^- derivative operator + D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator + @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache + @unpack splitting = volume_integral + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) + end + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + # f_minus_plus_element wraps the storage provided by f_minus_element and + # f_plus_element such that we can use a single plain broadcasting below. + # f_minus_element and f_plus_element are updated in broadcasting calls + # of the form `@. f_minus_plus_element = ...`. + f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] + f_minus_element = f_minus_threaded[Threads.threadid()] + f_plus_element = f_plus_threaded[Threads.threadid()] + u_element = view(u_vectors, :, element) + + # x direction + @. f_minus_plus_element = splitting(u_element, 1, equations) + mul!(view(du_vectors, :, element), D_plus, view(f_minus_element, :), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, :, element), D_minus, view(f_plus_element, :), + one(eltype(du)), one(eltype(du))) + end + + return nothing +end function calc_surface_integral!(du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - - @threaded for element in eachelement(dg, cache) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), element) - end - - return nothing -end + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + + @threaded for element in eachelement(dg, cache) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), element) + end + return nothing +end # Specialized interface flux computation because the upwind solver does # not require a standard numerical flux (Riemann solver). The flux splitting @@ -174,39 +175,39 @@ function calc_interface_flux!(surface_flux_values, nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - @unpack splitting = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - # Pull the left and right solution data - u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) - - # Compute the upwind coupling terms where right-traveling - # information comes from the left and left-traveling information - # comes from the right - flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations) - flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], equations) - - # Save the upwind coupling into the appropriate side of the elements - for v in eachvariable(equations) - surface_flux_values[v, left_direction, left_id] = flux_minus_rr[v] - surface_flux_values[v, right_direction, right_id] = flux_plus_ll[v] + @unpack splitting = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + # Pull the left and right solution data + u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface) + + # Compute the upwind coupling terms where right-traveling + # information comes from the left and left-traveling information + # comes from the right + flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], + equations) + flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], equations) + + # Save the upwind coupling into the appropriate side of the elements + for v in eachvariable(equations) + surface_flux_values[v, left_direction, left_id] = flux_minus_rr[v] + surface_flux_values[v, right_direction, right_id] = flux_plus_ll[v] + end end - end - return nothing + return nothing end - # Implementation of fully upwind SATs. The surface flux values are pre-computed # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular @@ -214,90 +215,88 @@ end function calc_surface_integral!(du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - @unpack splitting = surface_integral - - @threaded for element in eachelement(dg, cache) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, element) - f_node = splitting(u_node, Val{:plus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), element) - f_node = splitting(u_node, Val{:minus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), element) - end - - return nothing -end + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + @unpack splitting = surface_integral + + @threaded for element in eachelement(dg, cache) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, element) + f_node = splitting(u_node, Val{:plus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), element) + f_node = splitting(u_node, Val{:minus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), element) + end + return nothing +end # AnalysisCallback function integrate_via_indices(func::Func, u, mesh::TreeMesh{1}, equations, - dg::FDSBP, cache, args...; normalize=true) where {Func} - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * func(u, i, element, equations, dg, args...) + dg::FDSBP, cache, args...; normalize = true) where {Func} + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * + func(u, i, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{1}, equations, initial_condition, dg::FDSBP, cache, cache_analysis) - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - @unpack node_coordinates = cache.elements - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Calculate errors at each node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for i in eachnode(analyzer) - u_exact = initial_condition( - get_node_coords(node_coordinates, equations, dg, i, element), t, equations) - diff = func(u_exact, equations) - func( - get_node_vars(u, equations, dg, i, element), equations) - l2_error += diff.^2 * (weights[i] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + @unpack node_coordinates = cache.elements + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Calculate errors at each node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg, + i, element), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u, equations, dg, i, element), equations) + l2_error += diff .^ 2 * (weights[i] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end end - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - - end # @muladd diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index 295c0cada2a..241e0d95342 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -6,88 +6,89 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # 2D caches function create_cache(mesh::TreeMesh{2}, equations, volume_integral::VolumeIntegralStrongForm, dg, uEltype) + prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef, + ntuple(_ -> nnodes(dg), + ndims(mesh))...) + f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}( - undef, ntuple(_ -> nnodes(dg), ndims(mesh))...) - f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - - return (; f_threaded,) + return (; f_threaded) end function create_cache(mesh::TreeMesh{2}, equations, volume_integral::VolumeIntegralUpwind, dg, uEltype) + u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), + Val{nvariables(equations)}())) + f = StructArray([(u_node, u_node)]) + f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) + for _ in 1:Threads.nthreads()] + + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) + f_minus_threaded = [f_minus] + f_plus_threaded = [f_plus] + for i in 2:Threads.nthreads() + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) + push!(f_minus_threaded, f_minus) + push!(f_plus_threaded, f_plus) + end - u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}())) - f = StructArray([(u_node, u_node)]) - f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()] - - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) - f_minus_threaded = [f_minus] - f_plus_threaded = [f_plus] - for i in 2:Threads.nthreads() - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) - push!(f_minus_threaded, f_minus) - push!(f_plus_threaded, f_plus) - end - - return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,) + return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded) end - # 2D volume integral contributions for `VolumeIntegralStrongForm` function calc_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) - D = dg.basis # SBP derivative operator - @unpack f_threaded = cache - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - f_element = f_threaded[Threads.threadid()] - u_element = view(u_vectors, :, :, element) - - # x direction - @. f_element = flux(u_element, 1, equations) - for j in eachnode(dg) - mul!(view(du_vectors, :, j, element), D, view(f_element, :, j), - one(eltype(du)), one(eltype(du))) + D = dg.basis # SBP derivative operator + @unpack f_threaded = cache + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) end - # y direction - @. f_element = flux(u_element, 2, equations) - for i in eachnode(dg) - mul!(view(du_vectors, i, :, element), D, view(f_element, i, :), - one(eltype(du)), one(eltype(du))) + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + f_element = f_threaded[Threads.threadid()] + u_element = view(u_vectors, :, :, element) + + # x direction + @. f_element = flux(u_element, 1, equations) + for j in eachnode(dg) + mul!(view(du_vectors, :, j, element), D, view(f_element, :, j), + one(eltype(du)), one(eltype(du))) + end + + # y direction + @. f_element = flux(u_element, 2, equations) + for i in eachnode(dg) + mul!(view(du_vectors, i, :, element), D, view(f_element, i, :), + one(eltype(du)), one(eltype(du))) + end end - end - return nothing + return nothing end - # 2D volume integral contributions for `VolumeIntegralUpwind`. # Note that the plus / minus notation of the operators does not refer to the # upwind / downwind directions of the fluxes. @@ -100,106 +101,106 @@ function calc_volume_integral!(du, u, nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) - # Assume that - # dg.basis isa SummationByPartsOperators.UpwindOperators - D_minus = dg.basis.minus # Upwind SBP D^- derivative operator - D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator - @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache - @unpack splitting = volume_integral - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - # f_minus_plus_element wraps the storage provided by f_minus_element and - # f_plus_element such that we can use a single plain broadcasting below. - # f_minus_element and f_plus_element are updated in broadcasting calls - # of the form `@. f_minus_plus_element = ...`. - f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] - f_minus_element = f_minus_threaded[Threads.threadid()] - f_plus_element = f_plus_threaded[Threads.threadid()] - u_element = view(u_vectors, :, :, element) - - # x direction - @. f_minus_plus_element = splitting(u_element, 1, equations) - for j in eachnode(dg) - mul!(view(du_vectors, :, j, element), D_minus, view(f_plus_element, :, j), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, :, j, element), D_plus, view(f_minus_element, :, j), - one(eltype(du)), one(eltype(du))) + # Assume that + # dg.basis isa SummationByPartsOperators.UpwindOperators + D_minus = dg.basis.minus # Upwind SBP D^- derivative operator + D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator + @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache + @unpack splitting = volume_integral + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) end - # y direction - @. f_minus_plus_element = splitting(u_element, 2, equations) - for i in eachnode(dg) - mul!(view(du_vectors, i, :, element), D_minus, view(f_plus_element, i, :), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, i, :, element), D_plus, view(f_minus_element, i, :), - one(eltype(du)), one(eltype(du))) + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + # f_minus_plus_element wraps the storage provided by f_minus_element and + # f_plus_element such that we can use a single plain broadcasting below. + # f_minus_element and f_plus_element are updated in broadcasting calls + # of the form `@. f_minus_plus_element = ...`. + f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] + f_minus_element = f_minus_threaded[Threads.threadid()] + f_plus_element = f_plus_threaded[Threads.threadid()] + u_element = view(u_vectors, :, :, element) + + # x direction + @. f_minus_plus_element = splitting(u_element, 1, equations) + for j in eachnode(dg) + mul!(view(du_vectors, :, j, element), D_minus, view(f_plus_element, :, j), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, :, j, element), D_plus, view(f_minus_element, :, j), + one(eltype(du)), one(eltype(du))) + end + + # y direction + @. f_minus_plus_element = splitting(u_element, 2, equations) + for i in eachnode(dg) + mul!(view(du_vectors, i, :, element), D_minus, view(f_plus_element, i, :), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, i, :, element), D_plus, view(f_minus_element, i, :), + one(eltype(du)), one(eltype(du))) + end end - end - return nothing + return nothing end - function calc_surface_integral!(du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, l, element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, l, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), l, element) - - # surface at -y - u_node = get_node_vars(u, equations, dg, l, 1, element) - f_node = flux(u_node, 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, 1, element) - - # surface at +y - u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element) - f_node = flux(u_node, 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, nnodes(dg), element) + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, l, element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, l, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), l, element) + + # surface at -y + u_node = get_node_vars(u, equations, dg, l, 1, element) + f_node = flux(u_node, 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, 1, element) + + # surface at +y + u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element) + f_node = flux(u_node, 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, nnodes(dg), element) + end end - end - return nothing + return nothing end - # Specialized interface flux computation because the upwind solver does # not require a standard numerical flux (Riemann solver). The flux splitting # already separates the solution information into right-traveling and @@ -210,42 +211,43 @@ function calc_interface_flux!(surface_flux_values, nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - @unpack splitting = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for i in eachnode(dg) - # Pull the left and right solution data - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) - - # Compute the upwind coupling terms where right-traveling - # information comes from the left and left-traveling information - # comes from the right - flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations) - flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], equations) - - # Save the upwind coupling into the appropriate side of the elements - for v in eachvariable(equations) - surface_flux_values[v, i, left_direction, left_id] = flux_minus_rr[v] - surface_flux_values[v, i, right_direction, right_id] = flux_plus_ll[v] - end + @unpack splitting = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for i in eachnode(dg) + # Pull the left and right solution data + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface) + + # Compute the upwind coupling terms where right-traveling + # information comes from the left and left-traveling information + # comes from the right + flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], + equations) + flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], + equations) + + # Save the upwind coupling into the appropriate side of the elements + for v in eachvariable(equations) + surface_flux_values[v, i, left_direction, left_id] = flux_minus_rr[v] + surface_flux_values[v, i, right_direction, right_id] = flux_plus_ll[v] + end + end end - end - return nothing + return nothing end - # Implementation of fully upwind SATs. The surface flux values are pre-computed # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular @@ -253,106 +255,103 @@ end function calc_surface_integral!(du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - @unpack splitting = surface_integral - - - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, l, element) - f_node = splitting(u_node, Val{:plus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, l, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element) - f_node = splitting(u_node, Val{:minus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), l, element) - - # surface at -y - u_node = get_node_vars(u, equations, dg, l, 1, element) - f_node = splitting(u_node, Val{:plus}(), 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, 1, element) - - # surface at +y - u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element) - f_node = splitting(u_node, Val{:minus}(), 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, nnodes(dg), element) + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + @unpack splitting = surface_integral + + @threaded for element in eachelement(dg, cache) + for l in eachnode(dg) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, l, element) + f_node = splitting(u_node, Val{:plus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, l, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element) + f_node = splitting(u_node, Val{:minus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), l, element) + + # surface at -y + u_node = get_node_vars(u, equations, dg, l, 1, element) + f_node = splitting(u_node, Val{:plus}(), 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, 1, element) + + # surface at +y + u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element) + f_node = splitting(u_node, Val{:minus}(), 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, nnodes(dg), element) + end end - end - return nothing + return nothing end - # AnalysisCallback function integrate_via_indices(func::Func, u, mesh::TreeMesh{2}, equations, - dg::FDSBP, cache, args...; normalize=true) where {Func} - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for j in eachnode(dg), i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) + dg::FDSBP, cache, args...; normalize = true) where {Func} + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for j in eachnode(dg), i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * weights[j] * + func(u, i, j, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{2}, equations, initial_condition, dg::FDSBP, cache, cache_analysis) - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - @unpack node_coordinates = cache.elements - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Calculate errors at each node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition( - get_node_coords(node_coordinates, equations, dg, i, j, element), t, equations) - diff = func(u_exact, equations) - func( - get_node_vars(u, equations, dg, i, j, element), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + @unpack node_coordinates = cache.elements + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Calculate errors at each node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg, + i, j, element), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u, equations, dg, i, j, element), equations) + l2_error += diff .^ 2 * (weights[i] * weights[j] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end end - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - - end # @muladd diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index ed5f8b102a3..a4f69d3d481 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -6,95 +6,96 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # 3D caches function create_cache(mesh::TreeMesh{3}, equations, volume_integral::VolumeIntegralStrongForm, dg, uEltype) + prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef, + ntuple(_ -> nnodes(dg), + ndims(mesh))...) + f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}( - undef, ntuple(_ -> nnodes(dg), ndims(mesh))...) - f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()] - - return (; f_threaded,) + return (; f_threaded) end function create_cache(mesh::TreeMesh{3}, equations, volume_integral::VolumeIntegralUpwind, dg, uEltype) + u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), + Val{nvariables(equations)}())) + f = StructArray([(u_node, u_node)]) + f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) + for _ in 1:Threads.nthreads()] + + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) + f_minus_threaded = [f_minus] + f_plus_threaded = [f_plus] + for i in 2:Threads.nthreads() + f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) + push!(f_minus_threaded, f_minus) + push!(f_plus_threaded, f_plus) + end - u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}())) - f = StructArray([(u_node, u_node)]) - f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()] - - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1]) - f_minus_threaded = [f_minus] - f_plus_threaded = [f_plus] - for i in 2:Threads.nthreads() - f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i]) - push!(f_minus_threaded, f_minus) - push!(f_plus_threaded, f_plus) - end - - return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,) + return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded) end - # 3D volume integral contributions for `VolumeIntegralStrongForm` function calc_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) - D = dg.basis # SBP derivative operator - @unpack f_threaded = cache - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - f_element = f_threaded[Threads.threadid()] - u_element = view(u_vectors, :, :, :, element) - - # x direction - @. f_element = flux(u_element, 1, equations) - for j in eachnode(dg), k in eachnode(dg) - mul!(view(du_vectors, :, j, k, element), D, view(f_element, :, j, k), - one(eltype(du)), one(eltype(du))) - end - - # y direction - @. f_element = flux(u_element, 2, equations) - for i in eachnode(dg), k in eachnode(dg) - mul!(view(du_vectors, i, :, k, element), D, view(f_element, i, :, k), - one(eltype(du)), one(eltype(du))) + D = dg.basis # SBP derivative operator + @unpack f_threaded = cache + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) end - # z direction - @. f_element = flux(u_element, 3, equations) - for i in eachnode(dg), j in eachnode(dg) - mul!(view(du_vectors, i, j, :, element), D, view(f_element, i, j, :), - one(eltype(du)), one(eltype(du))) + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + f_element = f_threaded[Threads.threadid()] + u_element = view(u_vectors, :, :, :, element) + + # x direction + @. f_element = flux(u_element, 1, equations) + for j in eachnode(dg), k in eachnode(dg) + mul!(view(du_vectors, :, j, k, element), D, view(f_element, :, j, k), + one(eltype(du)), one(eltype(du))) + end + + # y direction + @. f_element = flux(u_element, 2, equations) + for i in eachnode(dg), k in eachnode(dg) + mul!(view(du_vectors, i, :, k, element), D, view(f_element, i, :, k), + one(eltype(du)), one(eltype(du))) + end + + # z direction + @. f_element = flux(u_element, 3, equations) + for i in eachnode(dg), j in eachnode(dg) + mul!(view(du_vectors, i, j, :, element), D, view(f_element, i, j, :), + one(eltype(du)), one(eltype(du))) + end end - end - return nothing + return nothing end - # 3D volume integral contributions for `VolumeIntegralUpwind`. # Note that the plus / minus notation of the operators does not refer to the # upwind / downwind directions of the fluxes. @@ -107,129 +108,135 @@ function calc_volume_integral!(du, u, nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) - # Assume that - # dg.basis isa SummationByPartsOperators.UpwindOperators - D_minus = dg.basis.minus # Upwind SBP D^- derivative operator - D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator - @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache - @unpack splitting = volume_integral - - # SBP operators from SummationByPartsOperators.jl implement the basic interface - # of matrix-vector multiplication. Thus, we pass an "array of structures", - # packing all variables per node in an `SVector`. - if nvariables(equations) == 1 - # `reinterpret(reshape, ...)` removes the leading dimension only if more - # than one variable is used. - u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), - nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) - du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du), - nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) - else - u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) - du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du) - end - - # Use the tensor product structure to compute the discrete derivatives of - # the fluxes line-by-line and add them to `du` for each element. - @threaded for element in eachelement(dg, cache) - # f_minus_plus_element wraps the storage provided by f_minus_element and - # f_plus_element such that we can use a single plain broadcasting below. - # f_minus_element and f_plus_element are updated in broadcasting calls - # of the form `@. f_minus_plus_element = ...`. - f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] - f_minus_element = f_minus_threaded[Threads.threadid()] - f_plus_element = f_plus_threaded[Threads.threadid()] - u_element = view(u_vectors, :, :, :, element) - - # x direction - @. f_minus_plus_element = splitting(u_element, 1, equations) - for j in eachnode(dg), k in eachnode(dg) - mul!(view(du_vectors, :, j, k, element), D_minus, view(f_plus_element, :, j, k), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, :, j, k, element), D_plus, view(f_minus_element, :, j, k), - one(eltype(du)), one(eltype(du))) + # Assume that + # dg.basis isa SummationByPartsOperators.UpwindOperators + D_minus = dg.basis.minus # Upwind SBP D^- derivative operator + D_plus = dg.basis.plus # Upwind SBP D^+ derivative operator + @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache + @unpack splitting = volume_integral + + # SBP operators from SummationByPartsOperators.jl implement the basic interface + # of matrix-vector multiplication. Thus, we pass an "array of structures", + # packing all variables per node in an `SVector`. + if nvariables(equations) == 1 + # `reinterpret(reshape, ...)` removes the leading dimension only if more + # than one variable is used. + u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u), + nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) + du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, + du), + nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache)) + else + u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u) + du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, + du) end - # y direction - @. f_minus_plus_element = splitting(u_element, 2, equations) - for i in eachnode(dg), k in eachnode(dg) - mul!(view(du_vectors, i, :, k, element), D_minus, view(f_plus_element, i, :, k), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, i, :, k, element), D_plus, view(f_minus_element, i, :, k), - one(eltype(du)), one(eltype(du))) + # Use the tensor product structure to compute the discrete derivatives of + # the fluxes line-by-line and add them to `du` for each element. + @threaded for element in eachelement(dg, cache) + # f_minus_plus_element wraps the storage provided by f_minus_element and + # f_plus_element such that we can use a single plain broadcasting below. + # f_minus_element and f_plus_element are updated in broadcasting calls + # of the form `@. f_minus_plus_element = ...`. + f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()] + f_minus_element = f_minus_threaded[Threads.threadid()] + f_plus_element = f_plus_threaded[Threads.threadid()] + u_element = view(u_vectors, :, :, :, element) + + # x direction + @. f_minus_plus_element = splitting(u_element, 1, equations) + for j in eachnode(dg), k in eachnode(dg) + mul!(view(du_vectors, :, j, k, element), D_minus, + view(f_plus_element, :, j, k), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, :, j, k, element), D_plus, + view(f_minus_element, :, j, k), + one(eltype(du)), one(eltype(du))) + end + + # y direction + @. f_minus_plus_element = splitting(u_element, 2, equations) + for i in eachnode(dg), k in eachnode(dg) + mul!(view(du_vectors, i, :, k, element), D_minus, + view(f_plus_element, i, :, k), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, i, :, k, element), D_plus, + view(f_minus_element, i, :, k), + one(eltype(du)), one(eltype(du))) + end + + # z direction + @. f_minus_plus_element = splitting(u_element, 3, equations) + for i in eachnode(dg), j in eachnode(dg) + mul!(view(du_vectors, i, j, :, element), D_minus, + view(f_plus_element, i, j, :), + one(eltype(du)), one(eltype(du))) + mul!(view(du_vectors, i, j, :, element), D_plus, + view(f_minus_element, i, j, :), + one(eltype(du)), one(eltype(du))) + end end - # z direction - @. f_minus_plus_element = splitting(u_element, 3, equations) - for i in eachnode(dg), j in eachnode(dg) - mul!(view(du_vectors, i, j, :, element), D_minus, view(f_plus_element, i, j, :), - one(eltype(du)), one(eltype(du))) - mul!(view(du_vectors, i, j, :, element), D_plus, view(f_minus_element, i, j, :), - one(eltype(du)), one(eltype(du))) - end - end - - return nothing + return nothing end - function calc_surface_integral!(du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, l, m, element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, l, m, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element) - f_node = flux(u_node, 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), l, m, element) - - # surface at -y - u_node = get_node_vars(u, equations, dg, l, 1, m, element) - f_node = flux(u_node, 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, 1, m, element) - - # surface at +y - u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element) - f_node = flux(u_node, 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, nnodes(dg), m, element) - - # surface at -z - u_node = get_node_vars(u, equations, dg, l, m, 1, element) - f_node = flux(u_node, 3, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, m, 1, element) - - # surface at +z - u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element) - f_node = flux(u_node, 3, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, m ,nnodes(dg), element) + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + + @threaded for element in eachelement(dg, cache) + for m in eachnode(dg), l in eachnode(dg) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, l, m, element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, l, m, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element) + f_node = flux(u_node, 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), l, m, element) + + # surface at -y + u_node = get_node_vars(u, equations, dg, l, 1, m, element) + f_node = flux(u_node, 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, 1, m, element) + + # surface at +y + u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element) + f_node = flux(u_node, 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, nnodes(dg), m, element) + + # surface at -z + u_node = get_node_vars(u, equations, dg, l, m, 1, element) + f_node = flux(u_node, 3, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, m, 1, element) + + # surface at +z + u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element) + f_node = flux(u_node, 3, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, m, nnodes(dg), element) + end end - end - return nothing + return nothing end - # Specialized interface flux computation because the upwind solver does # not require a standard numerical flux (Riemann solver). The flux splitting # already separates the solution information into right-traveling and @@ -240,43 +247,44 @@ function calc_interface_flux!(surface_flux_values, nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - @unpack splitting = surface_integral - @unpack u, neighbor_ids, orientations = cache.interfaces - - @threaded for interface in eachinterface(dg, cache) - # Get neighboring elements - left_id = neighbor_ids[1, interface] - right_id = neighbor_ids[2, interface] - - # Determine interface direction with respect to elements: - # orientation = 1: left -> 2, right -> 1 - # orientation = 2: left -> 4, right -> 3 - # orientation = 3: left -> 6, right -> 5 - left_direction = 2 * orientations[interface] - right_direction = 2 * orientations[interface] - 1 - - for j in eachnode(dg), i in eachnode(dg) - # Pull the left and right solution data - u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) - - # Compute the upwind coupling terms where right-traveling - # information comes from the left and left-traveling information - # comes from the right - flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations) - flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], equations) - - # Save the upwind coupling into the appropriate side of the elements - for v in eachvariable(equations) - surface_flux_values[v, i, j, left_direction, left_id] = flux_minus_rr[v] - surface_flux_values[v, i, j, right_direction, right_id] = flux_plus_ll[v] - end + @unpack splitting = surface_integral + @unpack u, neighbor_ids, orientations = cache.interfaces + + @threaded for interface in eachinterface(dg, cache) + # Get neighboring elements + left_id = neighbor_ids[1, interface] + right_id = neighbor_ids[2, interface] + + # Determine interface direction with respect to elements: + # orientation = 1: left -> 2, right -> 1 + # orientation = 2: left -> 4, right -> 3 + # orientation = 3: left -> 6, right -> 5 + left_direction = 2 * orientations[interface] + right_direction = 2 * orientations[interface] - 1 + + for j in eachnode(dg), i in eachnode(dg) + # Pull the left and right solution data + u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface) + + # Compute the upwind coupling terms where right-traveling + # information comes from the left and left-traveling information + # comes from the right + flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], + equations) + flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], + equations) + + # Save the upwind coupling into the appropriate side of the elements + for v in eachvariable(equations) + surface_flux_values[v, i, j, left_direction, left_id] = flux_minus_rr[v] + surface_flux_values[v, i, j, right_direction, right_id] = flux_plus_ll[v] + end + end end - end - return nothing + return nothing end - # Implementation of fully upwind SATs. The surface flux values are pre-computed # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular @@ -284,121 +292,119 @@ end function calc_surface_integral!(du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) - inv_weight_left = inv(left_boundary_weight(dg.basis)) - inv_weight_right = inv(right_boundary_weight(dg.basis)) - @unpack surface_flux_values = cache.elements - @unpack splitting = surface_integral - - - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - # surface at -x - u_node = get_node_vars(u, equations, dg, 1, l, m, element) - f_node = splitting(u_node, Val{:plus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, 1, l, m, element) - - # surface at +x - u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element) - f_node = splitting(u_node, Val{:minus}(), 1, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, nnodes(dg), l, m, element) - - # surface at -y - u_node = get_node_vars(u, equations, dg, l, 1, m, element) - f_node = splitting(u_node, Val{:plus}(), 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, 1, m, element) - - # surface at +y - u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element) - f_node = splitting(u_node, Val{:minus}(), 2, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, nnodes(dg), m, element) - - # surface at -z - u_node = get_node_vars(u, equations, dg, l, m, 1, element) - f_node = splitting(u_node, Val{:plus}(), 3, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element) - multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), - equations, dg, l, m, 1, element) - - # surface at +z - u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element) - f_node = splitting(u_node, Val{:minus}(), 3, equations) - f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element) - multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), - equations, dg, l, m, nnodes(dg), element) + inv_weight_left = inv(left_boundary_weight(dg.basis)) + inv_weight_right = inv(right_boundary_weight(dg.basis)) + @unpack surface_flux_values = cache.elements + @unpack splitting = surface_integral + + @threaded for element in eachelement(dg, cache) + for m in eachnode(dg), l in eachnode(dg) + # surface at -x + u_node = get_node_vars(u, equations, dg, 1, l, m, element) + f_node = splitting(u_node, Val{:plus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, 1, l, m, element) + + # surface at +x + u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element) + f_node = splitting(u_node, Val{:minus}(), 1, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, nnodes(dg), l, m, element) + + # surface at -y + u_node = get_node_vars(u, equations, dg, l, 1, m, element) + f_node = splitting(u_node, Val{:plus}(), 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, 1, m, element) + + # surface at +y + u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element) + f_node = splitting(u_node, Val{:minus}(), 2, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, nnodes(dg), m, element) + + # surface at -z + u_node = get_node_vars(u, equations, dg, l, m, 1, element) + f_node = splitting(u_node, Val{:plus}(), 3, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element) + multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node), + equations, dg, l, m, 1, element) + + # surface at +z + u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element) + f_node = splitting(u_node, Val{:minus}(), 3, equations) + f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element) + multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node), + equations, dg, l, m, nnodes(dg), element) + end end - end - return nothing + return nothing end - # AnalysisCallback function integrate_via_indices(func::Func, u, mesh::TreeMesh{3}, equations, - dg::FDSBP, cache, args...; normalize=true) where {Func} - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - - # Initialize integral with zeros of the right shape - integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) - - # Use quadrature to numerically integrate over entire domain - for element in eachelement(dg, cache) - volume_jacobian_ = volume_jacobian(element, mesh, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) + dg::FDSBP, cache, args...; normalize = true) where {Func} + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + + # Initialize integral with zeros of the right shape + integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) + + # Use quadrature to numerically integrate over entire domain + for element in eachelement(dg, cache) + volume_jacobian_ = volume_jacobian(element, mesh, cache) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * + func(u, i, j, k, element, equations, dg, args...) + end end - end - # Normalize with total volume - if normalize - integral = integral / total_volume(mesh) - end + # Normalize with total volume + if normalize + integral = integral / total_volume(mesh) + end - return integral + return integral end function calc_error_norms(func, u, t, analyzer, mesh::TreeMesh{3}, equations, initial_condition, dg::FDSBP, cache, cache_analysis) - # TODO: FD. This is rather inefficient right now and allocates... - weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) - @unpack node_coordinates = cache.elements - - # Set up data structures - l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) - linf_error = copy(l2_error) - - # Iterate over all elements for error calculations - for element in eachelement(dg, cache) - # Calculate errors at each node - volume_jacobian_ = volume_jacobian(element, mesh, cache) - - for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) - u_exact = initial_condition( - get_node_coords(node_coordinates, equations, dg, i, j, k, element), t, equations) - diff = func(u_exact, equations) - func( - get_node_vars(u, equations, dg, i, j, k, element), equations) - l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * volume_jacobian_) - linf_error = @. max(linf_error, abs(diff)) + # TODO: FD. This is rather inefficient right now and allocates... + weights = diag(SummationByPartsOperators.mass_matrix(dg.basis)) + @unpack node_coordinates = cache.elements + + # Set up data structures + l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) + linf_error = copy(l2_error) + + # Iterate over all elements for error calculations + for element in eachelement(dg, cache) + # Calculate errors at each node + volume_jacobian_ = volume_jacobian(element, mesh, cache) + + for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer) + u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg, + i, j, k, element), t, equations) + diff = func(u_exact, equations) - + func(get_node_vars(u, equations, dg, i, j, k, element), equations) + l2_error += diff .^ 2 * + (weights[i] * weights[j] * weights[k] * volume_jacobian_) + linf_error = @. max(linf_error, abs(diff)) + end end - end - # For L2 error, divide by total volume - total_volume_ = total_volume(mesh) - l2_error = @. sqrt(l2_error / total_volume_) + # For L2 error, divide by total volume + total_volume_ = total_volume(mesh) + l2_error = @. sqrt(l2_error / total_volume_) - return l2_error, linf_error + return l2_error, linf_error end - - end # @muladd diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 465e051482c..a39f7cb1751 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -3,11 +3,11 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # define types for parabolic solvers include("solvers_parabolic.jl") include("dg.jl") include("dgmulti.jl") - end # @muladd diff --git a/src/solvers/solvers_parabolic.jl b/src/solvers/solvers_parabolic.jl index f253cdbd03d..ed095cd675f 100644 --- a/src/solvers/solvers_parabolic.jl +++ b/src/solvers/solvers_parabolic.jl @@ -25,7 +25,7 @@ the LDG solver is equivalent to [`ViscousFormulationBassiRebay1`](@ref) with an [DOI: 10.1137/S0036142997316712](https://doi.org/10.1137/S0036142997316712) """ struct ViscousFormulationLocalDG{P} - penalty_parameter::P + penalty_parameter::P end -default_parabolic_solver() = ViscousFormulationBassiRebay1() \ No newline at end of file +default_parabolic_solver() = ViscousFormulationBassiRebay1() diff --git a/src/time_integration/methods_2N.jl b/src/time_integration/methods_2N.jl index b6fbdd939c2..557e8272128 100644 --- a/src/time_integration/methods_2N.jl +++ b/src/time_integration/methods_2N.jl @@ -3,12 +3,11 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Abstract base type for time integration schemes of storage class `2N` abstract type SimpleAlgorithm2N end - """ CarpenterKennedy2N54() @@ -20,169 +19,176 @@ the low-storage explicit Runge-Kutta method of using the same interface as OrdinaryDiffEq.jl. """ struct CarpenterKennedy2N54 <: SimpleAlgorithm2N - a::SVector{5, Float64} - b::SVector{5, Float64} - c::SVector{5, Float64} - - function CarpenterKennedy2N54() - a = SVector(0.0, 567301805773.0 / 1357537059087.0,2404267990393.0 / 2016746695238.0, - 3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0) - b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0, - 1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0, - 2277821191437.0 / 14882151754819.0) - c = SVector(0.0, 1432997174477.0 / 9575080441755.0, 2526269341429.0 / 6820363962896.0, - 2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0) - - new(a, b, c) - end + a::SVector{5, Float64} + b::SVector{5, Float64} + c::SVector{5, Float64} + + function CarpenterKennedy2N54() + a = SVector(0.0, 567301805773.0 / 1357537059087.0, + 2404267990393.0 / 2016746695238.0, + 3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0) + b = SVector(1432997174477.0 / 9575080441755.0, + 5161836677717.0 / 13612068292357.0, + 1720146321549.0 / 2090206949498.0, + 3134564353537.0 / 4481467310338.0, + 2277821191437.0 / 14882151754819.0) + c = SVector(0.0, 1432997174477.0 / 9575080441755.0, + 2526269341429.0 / 6820363962896.0, + 2006345519317.0 / 3224310063776.0, + 2802321613138.0 / 2924317926251.0) + + new(a, b, c) + end end - """ CarpenterKennedy2N43() Carpenter, Kennedy (1994) Third order 2N storage RK schemes with error control """ struct CarpenterKennedy2N43 <: SimpleAlgorithm2N - a::SVector{4, Float64} - b::SVector{4, Float64} - c::SVector{4, Float64} + a::SVector{4, Float64} + b::SVector{4, Float64} + c::SVector{4, Float64} - function CarpenterKennedy2N43() - a = SVector(0, 756391 / 934407, 36441873 / 15625000, 1953125 / 1085297) - b = SVector(8 / 141, 6627 / 2000, 609375 / 1085297, 198961 / 526383) - c = SVector(0, 8 / 141, 86 / 125, 1) + function CarpenterKennedy2N43() + a = SVector(0, 756391 / 934407, 36441873 / 15625000, 1953125 / 1085297) + b = SVector(8 / 141, 6627 / 2000, 609375 / 1085297, 198961 / 526383) + c = SVector(0, 8 / 141, 86 / 125, 1) - new(a, b, c) - end + new(a, b, c) + end end - # This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L1 mutable struct SimpleIntegrator2NOptions{Callback} - callback::Callback # callbacks; used in Trixi.jl - adaptive::Bool # whether the algorithm is adaptive; ignored - dtmax::Float64 # ignored - maxiters::Int # maximal number of time steps - tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored + callback::Callback # callbacks; used in Trixi.jl + adaptive::Bool # whether the algorithm is adaptive; ignored + dtmax::Float64 # ignored + maxiters::Int # maximal number of time steps + tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored end -function SimpleIntegrator2NOptions(callback, tspan; maxiters=typemax(Int), kwargs...) - SimpleIntegrator2NOptions{typeof(callback)}( - callback, false, Inf, maxiters, [last(tspan)]) +function SimpleIntegrator2NOptions(callback, tspan; maxiters = typemax(Int), kwargs...) + SimpleIntegrator2NOptions{typeof(callback)}(callback, false, Inf, maxiters, + [last(tspan)]) end # This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L77 # This implements the interface components described at # https://diffeq.sciml.ai/v6.8/basics/integrator/#Handing-Integrators-1 # which are used in Trixi.jl. -mutable struct SimpleIntegrator2N{RealT<:Real, uType, Params, Sol, F, Alg, SimpleIntegrator2NOptions} - u::uType # - du::uType - u_tmp::uType - t::RealT - dt::RealT # current time step - dtcache::RealT # ignored - iter::Int # current number of time steps (iteration) - p::Params # will be the semidiscretization from Trixi.jl - sol::Sol # faked - f::F - alg::Alg - opts::SimpleIntegrator2NOptions - finalstep::Bool # added for convenience +mutable struct SimpleIntegrator2N{RealT <: Real, uType, Params, Sol, F, Alg, + SimpleIntegrator2NOptions} + u::uType # + du::uType + u_tmp::uType + t::RealT + dt::RealT # current time step + dtcache::RealT # ignored + iter::Int # current number of time steps (iteration) + p::Params # will be the semidiscretization from Trixi.jl + sol::Sol # faked + f::F + alg::Alg + opts::SimpleIntegrator2NOptions + finalstep::Bool # added for convenience end # Forward integrator.stats.naccept to integrator.iter (see GitHub PR#771) function Base.getproperty(integrator::SimpleIntegrator2N, field::Symbol) - if field === :stats - return (naccept = getfield(integrator, :iter),) - end - # general fallback - return getfield(integrator, field) + if field === :stats + return (naccept = getfield(integrator, :iter),) + end + # general fallback + return getfield(integrator, field) end # Fakes `solve`: https://diffeq.sciml.ai/v6.8/basics/overview/#Solving-the-Problems-1 function solve(ode::ODEProblem, alg::T; - dt, callback=nothing, kwargs...) where {T<:SimpleAlgorithm2N} - u = copy(ode.u0) - du = similar(u) - u_tmp = similar(u) - t = first(ode.tspan) - iter = 0 - integrator = SimpleIntegrator2N(u, du, u_tmp, t, dt, zero(dt), iter, ode.p, - (prob=ode,), ode.f, alg, - SimpleIntegrator2NOptions(callback, ode.tspan; kwargs...), false) - - # initialize callbacks - if callback isa CallbackSet - for cb in callback.continuous_callbacks - error("unsupported") - end - for cb in callback.discrete_callbacks - cb.initialize(cb, integrator.u, integrator.t, integrator) + dt, callback = nothing, kwargs...) where {T <: SimpleAlgorithm2N} + u = copy(ode.u0) + du = similar(u) + u_tmp = similar(u) + t = first(ode.tspan) + iter = 0 + integrator = SimpleIntegrator2N(u, du, u_tmp, t, dt, zero(dt), iter, ode.p, + (prob = ode,), ode.f, alg, + SimpleIntegrator2NOptions(callback, ode.tspan; + kwargs...), false) + + # initialize callbacks + if callback isa CallbackSet + for cb in callback.continuous_callbacks + error("unsupported") + end + for cb in callback.discrete_callbacks + cb.initialize(cb, integrator.u, integrator.t, integrator) + end + elseif !isnothing(callback) + error("unsupported") end - elseif !isnothing(callback) - error("unsupported") - end - solve!(integrator) + solve!(integrator) end function solve!(integrator::SimpleIntegrator2N) - @unpack prob = integrator.sol - @unpack alg = integrator - t_end = last(prob.tspan) - callbacks = integrator.opts.callback - - integrator.finalstep = false - @trixi_timeit timer() "main loop" while !integrator.finalstep - if isnan(integrator.dt) - error("time step size `dt` is NaN") - end + @unpack prob = integrator.sol + @unpack alg = integrator + t_end = last(prob.tspan) + callbacks = integrator.opts.callback + + integrator.finalstep = false + @trixi_timeit timer() "main loop" while !integrator.finalstep + if isnan(integrator.dt) + error("time step size `dt` is NaN") + end - # if the next iteration would push the simulation beyond the end time, set dt accordingly - if integrator.t + integrator.dt > t_end || isapprox(integrator.t + integrator.dt, t_end) - integrator.dt = t_end - integrator.t - terminate!(integrator) - end + # if the next iteration would push the simulation beyond the end time, set dt accordingly + if integrator.t + integrator.dt > t_end || + isapprox(integrator.t + integrator.dt, t_end) + integrator.dt = t_end - integrator.t + terminate!(integrator) + end - # one time step - integrator.u_tmp .= 0 - for stage in eachindex(alg.c) - t_stage = integrator.t + integrator.dt * alg.c[stage] - integrator.f(integrator.du, integrator.u, prob.p, t_stage) - - a_stage = alg.a[stage] - b_stage_dt = alg.b[stage] * integrator.dt - @trixi_timeit timer() "Runge-Kutta step" begin - @threaded for i in eachindex(integrator.u) - integrator.u_tmp[i] = integrator.du[i] - integrator.u_tmp[i] * a_stage - integrator.u[i] += integrator.u_tmp[i] * b_stage_dt + # one time step + integrator.u_tmp .= 0 + for stage in eachindex(alg.c) + t_stage = integrator.t + integrator.dt * alg.c[stage] + integrator.f(integrator.du, integrator.u, prob.p, t_stage) + + a_stage = alg.a[stage] + b_stage_dt = alg.b[stage] * integrator.dt + @trixi_timeit timer() "Runge-Kutta step" begin + @threaded for i in eachindex(integrator.u) + integrator.u_tmp[i] = integrator.du[i] - + integrator.u_tmp[i] * a_stage + integrator.u[i] += integrator.u_tmp[i] * b_stage_dt + end + end end - end - end - integrator.iter += 1 - integrator.t += integrator.dt - - # handle callbacks - if callbacks isa CallbackSet - for cb in callbacks.discrete_callbacks - if cb.condition(integrator.u, integrator.t, integrator) - cb.affect!(integrator) + integrator.iter += 1 + integrator.t += integrator.dt + + # handle callbacks + if callbacks isa CallbackSet + for cb in callbacks.discrete_callbacks + if cb.condition(integrator.u, integrator.t, integrator) + cb.affect!(integrator) + end + end end - end - end - # respect maximum number of iterations - if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep - @warn "Interrupted. Larger maxiters is needed." - terminate!(integrator) + # respect maximum number of iterations + if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep + @warn "Interrupted. Larger maxiters is needed." + terminate!(integrator) + end end - end - return TimeIntegratorSolution((first(prob.tspan), integrator.t), - (prob.u0, integrator.u), - integrator.sol.prob) + return TimeIntegratorSolution((first(prob.tspan), integrator.t), + (prob.u0, integrator.u), + integrator.sol.prob) end # get a cache where the RHS can be stored @@ -194,21 +200,19 @@ u_modified!(integrator::SimpleIntegrator2N, ::Bool) = false # used by adaptive timestepping algorithms in DiffEq function set_proposed_dt!(integrator::SimpleIntegrator2N, dt) - integrator.dt = dt + integrator.dt = dt end # stop the time integration function terminate!(integrator::SimpleIntegrator2N) - integrator.finalstep = true - empty!(integrator.opts.tstops) + integrator.finalstep = true + empty!(integrator.opts.tstops) end # used for AMR function Base.resize!(integrator::SimpleIntegrator2N, new_size) - resize!(integrator.u, new_size) - resize!(integrator.du, new_size) - resize!(integrator.u_tmp, new_size) + resize!(integrator.u, new_size) + resize!(integrator.du, new_size) + resize!(integrator.u_tmp, new_size) end - - end # @muladd diff --git a/src/time_integration/methods_3Sstar.jl b/src/time_integration/methods_3Sstar.jl index 15299b9e197..03232c04122 100644 --- a/src/time_integration/methods_3Sstar.jl +++ b/src/time_integration/methods_3Sstar.jl @@ -3,12 +3,11 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Abstract base type for time integration schemes of storage class `3S*` abstract type SimpleAlgorithm3Sstar end - """ HypDiffN3Erk3Sstar52() @@ -16,26 +15,37 @@ Five stage, second-order accurate explicit Runge-Kutta scheme with stability reg the hyperbolic diffusion equation with LLF flux and polynomials of degree polydeg=3. """ struct HypDiffN3Erk3Sstar52 <: SimpleAlgorithm3Sstar - gamma1::SVector{5, Float64} - gamma2::SVector{5, Float64} - gamma3::SVector{5, Float64} - beta::SVector{5, Float64} - delta::SVector{5, Float64} - c::SVector{5, Float64} - - function HypDiffN3Erk3Sstar52() - gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01, 1.0385212774098265E+00, 3.6859755007388034E-01, -6.3350615190506088E-01) - gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, -2.7595818152587825E-02, 9.1271323651988631E-02, 6.8495995159465062E-01) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 4.1301005663300466E-01, -5.4537881202277507E-03) - beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01, 3.7561630338850771E-01, 2.9356700007428856E-02, 2.5205285143494666E-01) - delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01, 2.6579275844515687E-01, 9.9687218193685878E-01, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01, 1.0221535725056414E+00, 1.4280257701954349E+00, 7.1581334196229851E-01) - - new(gamma1, gamma2, gamma3, beta, delta, c) - end + gamma1::SVector{5, Float64} + gamma2::SVector{5, Float64} + gamma3::SVector{5, Float64} + beta::SVector{5, Float64} + delta::SVector{5, Float64} + c::SVector{5, Float64} + + function HypDiffN3Erk3Sstar52() + gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01, + 1.0385212774098265E+00, 3.6859755007388034E-01, + -6.3350615190506088E-01) + gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, + -2.7595818152587825E-02, 9.1271323651988631E-02, + 6.8495995159465062E-01) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00, 4.1301005663300466E-01, + -5.4537881202277507E-03) + beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01, + 3.7561630338850771E-01, 2.9356700007428856E-02, + 2.5205285143494666E-01) + delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01, + 2.6579275844515687E-01, 9.9687218193685878E-01, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01, + 1.0221535725056414E+00, 1.4280257701954349E+00, + 7.1581334196229851E-01) + + new(gamma1, gamma2, gamma3, beta, delta, c) + end end - """ ParsaniKetchesonDeconinck3Sstar94() @@ -44,26 +54,49 @@ Parsani, Ketcheson, Deconinck (2013) [DOI: 10.1137/120885899](https://doi.org/10.1137/120885899) """ struct ParsaniKetchesonDeconinck3Sstar94 <: SimpleAlgorithm3Sstar - gamma1::SVector{9, Float64} - gamma2::SVector{9, Float64} - gamma3::SVector{9, Float64} - beta::SVector{9, Float64} - delta::SVector{9, Float64} - c::SVector{9, Float64} - - function ParsaniKetchesonDeconinck3Sstar94() - gamma1 = SVector(0.0000000000000000E+00, -4.6556413837561301E+00, -7.7202649689034453E-01, -4.0244202720632174E+00, -2.1296873883702272E-02, -2.4350219407769953E+00, 1.9856336960249132E-02, -2.8107894116913812E-01, 1.6894354373677900E-01) - gamma2 = SVector(1.0000000000000000E+00, 2.4992627683300688E+00, 5.8668202764174726E-01, 1.2051419816240785E+00, 3.4747937498564541E-01, 1.3213458736302766E+00, 3.1196363453264964E-01, 4.3514189245414447E-01, 2.3596980658341213E-01) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 7.6209857891449362E-01, -1.9811817832965520E-01, -6.2289587091629484E-01, -3.7522475499063573E-01, -3.3554373281046146E-01, -4.5609629702116454E-02) - beta = SVector(2.8363432481011769E-01, 9.7364980747486463E-01, 3.3823592364196498E-01, -3.5849518935750763E-01, -4.1139587569859462E-03, 1.4279689871485013E+00, 1.8084680519536503E-02, 1.6057708856060501E-01, 2.9522267863254809E-01) - delta = SVector(1.0000000000000000E+00, 1.2629238731608268E+00, 7.5749675232391733E-01, 5.1635907196195419E-01, -2.7463346616574083E-02, -4.3826743572318672E-01, 1.2735870231839268E+00, -6.2947382217730230E-01, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 2.8363432481011769E-01, 5.4840742446661772E-01, 3.6872298094969475E-01, -6.8061183026103156E-01, 3.5185265855105619E-01, 1.6659419385562171E+00, 9.7152778807463247E-01, 9.0515694340066954E-01) - - new(gamma1, gamma2, gamma3, beta, delta, c) - end + gamma1::SVector{9, Float64} + gamma2::SVector{9, Float64} + gamma3::SVector{9, Float64} + beta::SVector{9, Float64} + delta::SVector{9, Float64} + c::SVector{9, Float64} + + function ParsaniKetchesonDeconinck3Sstar94() + gamma1 = SVector(0.0000000000000000E+00, -4.6556413837561301E+00, + -7.7202649689034453E-01, -4.0244202720632174E+00, + -2.1296873883702272E-02, -2.4350219407769953E+00, + 1.9856336960249132E-02, -2.8107894116913812E-01, + 1.6894354373677900E-01) + gamma2 = SVector(1.0000000000000000E+00, 2.4992627683300688E+00, + 5.8668202764174726E-01, 1.2051419816240785E+00, + 3.4747937498564541E-01, 1.3213458736302766E+00, + 3.1196363453264964E-01, 4.3514189245414447E-01, + 2.3596980658341213E-01) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00, 7.6209857891449362E-01, + -1.9811817832965520E-01, -6.2289587091629484E-01, + -3.7522475499063573E-01, -3.3554373281046146E-01, + -4.5609629702116454E-02) + beta = SVector(2.8363432481011769E-01, 9.7364980747486463E-01, + 3.3823592364196498E-01, -3.5849518935750763E-01, + -4.1139587569859462E-03, 1.4279689871485013E+00, + 1.8084680519536503E-02, 1.6057708856060501E-01, + 2.9522267863254809E-01) + delta = SVector(1.0000000000000000E+00, 1.2629238731608268E+00, + 7.5749675232391733E-01, 5.1635907196195419E-01, + -2.7463346616574083E-02, -4.3826743572318672E-01, + 1.2735870231839268E+00, -6.2947382217730230E-01, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 2.8363432481011769E-01, + 5.4840742446661772E-01, 3.6872298094969475E-01, + -6.8061183026103156E-01, 3.5185265855105619E-01, + 1.6659419385562171E+00, 9.7152778807463247E-01, + 9.0515694340066954E-01) + + new(gamma1, gamma2, gamma3, beta, delta, c) + end end - """ ParsaniKetchesonDeconinck3Sstar32() @@ -72,182 +105,193 @@ Parsani, Ketcheson, Deconinck (2013) [DOI: 10.1137/120885899](https://doi.org/10.1137/120885899) """ struct ParsaniKetchesonDeconinck3Sstar32 <: SimpleAlgorithm3Sstar - gamma1::SVector{3, Float64} - gamma2::SVector{3, Float64} - gamma3::SVector{3, Float64} - beta::SVector{3, Float64} - delta::SVector{3, Float64} - c::SVector{3, Float64} - - function ParsaniKetchesonDeconinck3Sstar32() - gamma1 = SVector(0.0000000000000000E+00, -1.2664395576322218E-01, 1.1426980685848858E+00) - gamma2 = SVector(1.0000000000000000E+00, 6.5427782599406470E-01, -8.2869287683723744E-02) - gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00) - beta = SVector(7.2366074728360086E-01, 3.4217876502651023E-01, 3.6640216242653251E-01) - delta = SVector(1.0000000000000000E+00, 7.2196567116037724E-01, 0.0000000000000000E+00) - c = SVector(0.0000000000000000E+00, 7.2366074728360086E-01, 5.9236433182015646E-01) - - new(gamma1, gamma2, gamma3, beta, delta, c) - end + gamma1::SVector{3, Float64} + gamma2::SVector{3, Float64} + gamma3::SVector{3, Float64} + beta::SVector{3, Float64} + delta::SVector{3, Float64} + c::SVector{3, Float64} + + function ParsaniKetchesonDeconinck3Sstar32() + gamma1 = SVector(0.0000000000000000E+00, -1.2664395576322218E-01, + 1.1426980685848858E+00) + gamma2 = SVector(1.0000000000000000E+00, 6.5427782599406470E-01, + -8.2869287683723744E-02) + gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, + 0.0000000000000000E+00) + beta = SVector(7.2366074728360086E-01, 3.4217876502651023E-01, + 3.6640216242653251E-01) + delta = SVector(1.0000000000000000E+00, 7.2196567116037724E-01, + 0.0000000000000000E+00) + c = SVector(0.0000000000000000E+00, 7.2366074728360086E-01, + 5.9236433182015646E-01) + + new(gamma1, gamma2, gamma3, beta, delta, c) + end end - mutable struct SimpleIntegrator3SstarOptions{Callback} - callback::Callback # callbacks; used in Trixi.jl - adaptive::Bool # whether the algorithm is adaptive; ignored - dtmax::Float64 # ignored - maxiters::Int # maximal number of time steps - tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored + callback::Callback # callbacks; used in Trixi.jl + adaptive::Bool # whether the algorithm is adaptive; ignored + dtmax::Float64 # ignored + maxiters::Int # maximal number of time steps + tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored end -function SimpleIntegrator3SstarOptions(callback, tspan; maxiters=typemax(Int), kwargs...) - SimpleIntegrator3SstarOptions{typeof(callback)}( - callback, false, Inf, maxiters, [last(tspan)]) +function SimpleIntegrator3SstarOptions(callback, tspan; maxiters = typemax(Int), + kwargs...) + SimpleIntegrator3SstarOptions{typeof(callback)}(callback, false, Inf, maxiters, + [last(tspan)]) end -mutable struct SimpleIntegrator3Sstar{RealT<:Real, uType, Params, Sol, F, Alg, SimpleIntegrator3SstarOptions} - u::uType # - du::uType - u_tmp1::uType - u_tmp2::uType - t::RealT - dt::RealT # current time step - dtcache::RealT # ignored - iter::Int # current number of time step (iteration) - p::Params # will be the semidiscretization from Trixi.jl - sol::Sol # faked - f::F - alg::Alg - opts::SimpleIntegrator3SstarOptions - finalstep::Bool # added for convenience +mutable struct SimpleIntegrator3Sstar{RealT <: Real, uType, Params, Sol, F, Alg, + SimpleIntegrator3SstarOptions} + u::uType # + du::uType + u_tmp1::uType + u_tmp2::uType + t::RealT + dt::RealT # current time step + dtcache::RealT # ignored + iter::Int # current number of time step (iteration) + p::Params # will be the semidiscretization from Trixi.jl + sol::Sol # faked + f::F + alg::Alg + opts::SimpleIntegrator3SstarOptions + finalstep::Bool # added for convenience end # Forward integrator.stats.naccept to integrator.iter (see GitHub PR#771) function Base.getproperty(integrator::SimpleIntegrator3Sstar, field::Symbol) - if field === :stats - return (naccept = getfield(integrator, :iter),) - end - # general fallback - return getfield(integrator, field) + if field === :stats + return (naccept = getfield(integrator, :iter),) + end + # general fallback + return getfield(integrator, field) end # Fakes `solve`: https://diffeq.sciml.ai/v6.8/basics/overview/#Solving-the-Problems-1 function solve(ode::ODEProblem, alg::T; - dt, callback=nothing, kwargs...) where {T<:SimpleAlgorithm3Sstar} - u = copy(ode.u0) - du = similar(u) - u_tmp1 = similar(u) - u_tmp2 = similar(u) - t = first(ode.tspan) - iter = 0 - integrator = SimpleIntegrator3Sstar(u, du, u_tmp1, u_tmp2, t, dt, zero(dt), iter, ode.p, - (prob=ode,), ode.f, alg, - SimpleIntegrator3SstarOptions(callback, ode.tspan; kwargs...), false) - - # initialize callbacks - if callback isa CallbackSet - for cb in callback.continuous_callbacks - error("unsupported") - end - for cb in callback.discrete_callbacks - cb.initialize(cb, integrator.u, integrator.t, integrator) + dt, callback = nothing, kwargs...) where {T <: SimpleAlgorithm3Sstar} + u = copy(ode.u0) + du = similar(u) + u_tmp1 = similar(u) + u_tmp2 = similar(u) + t = first(ode.tspan) + iter = 0 + integrator = SimpleIntegrator3Sstar(u, du, u_tmp1, u_tmp2, t, dt, zero(dt), iter, + ode.p, + (prob = ode,), ode.f, alg, + SimpleIntegrator3SstarOptions(callback, + ode.tspan; + kwargs...), false) + + # initialize callbacks + if callback isa CallbackSet + for cb in callback.continuous_callbacks + error("unsupported") + end + for cb in callback.discrete_callbacks + cb.initialize(cb, integrator.u, integrator.t, integrator) + end + elseif !isnothing(callback) + error("unsupported") end - elseif !isnothing(callback) - error("unsupported") - end - solve!(integrator) + solve!(integrator) end function solve!(integrator::SimpleIntegrator3Sstar) - @unpack prob = integrator.sol - @unpack alg = integrator - t_end = last(prob.tspan) - callbacks = integrator.opts.callback - - integrator.finalstep = false - @trixi_timeit timer() "main loop" while !integrator.finalstep - if isnan(integrator.dt) - error("time step size `dt` is NaN") - end + @unpack prob = integrator.sol + @unpack alg = integrator + t_end = last(prob.tspan) + callbacks = integrator.opts.callback + + integrator.finalstep = false + @trixi_timeit timer() "main loop" while !integrator.finalstep + if isnan(integrator.dt) + error("time step size `dt` is NaN") + end - # if the next iteration would push the simulation beyond the end time, set dt accordingly - if integrator.t + integrator.dt > t_end || isapprox(integrator.t + integrator.dt, t_end) - integrator.dt = t_end - integrator.t - terminate!(integrator) - end + # if the next iteration would push the simulation beyond the end time, set dt accordingly + if integrator.t + integrator.dt > t_end || + isapprox(integrator.t + integrator.dt, t_end) + integrator.dt = t_end - integrator.t + terminate!(integrator) + end - # one time step - integrator.u_tmp1 .= zero(eltype(integrator.u_tmp1)) - integrator.u_tmp2 .= integrator.u - for stage in eachindex(alg.c) - t_stage = integrator.t + integrator.dt * alg.c[stage] - prob.f(integrator.du, integrator.u, prob.p, t_stage) - - delta_stage = alg.delta[stage] - gamma1_stage = alg.gamma1[stage] - gamma2_stage = alg.gamma2[stage] - gamma3_stage = alg.gamma3[stage] - beta_stage_dt = alg.beta[stage] * integrator.dt - @trixi_timeit timer() "Runge-Kutta step" begin - @threaded for i in eachindex(integrator.u) - integrator.u_tmp1[i] += delta_stage * integrator.u[i] - integrator.u[i] = (gamma1_stage * integrator.u[i] + - gamma2_stage * integrator.u_tmp1[i] + - gamma3_stage * integrator.u_tmp2[i] + - beta_stage_dt * integrator.du[i]) + # one time step + integrator.u_tmp1 .= zero(eltype(integrator.u_tmp1)) + integrator.u_tmp2 .= integrator.u + for stage in eachindex(alg.c) + t_stage = integrator.t + integrator.dt * alg.c[stage] + prob.f(integrator.du, integrator.u, prob.p, t_stage) + + delta_stage = alg.delta[stage] + gamma1_stage = alg.gamma1[stage] + gamma2_stage = alg.gamma2[stage] + gamma3_stage = alg.gamma3[stage] + beta_stage_dt = alg.beta[stage] * integrator.dt + @trixi_timeit timer() "Runge-Kutta step" begin + @threaded for i in eachindex(integrator.u) + integrator.u_tmp1[i] += delta_stage * integrator.u[i] + integrator.u[i] = (gamma1_stage * integrator.u[i] + + gamma2_stage * integrator.u_tmp1[i] + + gamma3_stage * integrator.u_tmp2[i] + + beta_stage_dt * integrator.du[i]) + end + end end - end - end - integrator.iter += 1 - integrator.t += integrator.dt - - # handle callbacks - if callbacks isa CallbackSet - for cb in callbacks.discrete_callbacks - if cb.condition(integrator.u, integrator.t, integrator) - cb.affect!(integrator) + integrator.iter += 1 + integrator.t += integrator.dt + + # handle callbacks + if callbacks isa CallbackSet + for cb in callbacks.discrete_callbacks + if cb.condition(integrator.u, integrator.t, integrator) + cb.affect!(integrator) + end + end end - end - end - # respect maximum number of iterations - if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep - @warn "Interrupted. Larger maxiters is needed." - terminate!(integrator) + # respect maximum number of iterations + if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep + @warn "Interrupted. Larger maxiters is needed." + terminate!(integrator) + end end - end - return TimeIntegratorSolution((first(prob.tspan), integrator.t), - (prob.u0, integrator.u), - integrator.sol.prob) + return TimeIntegratorSolution((first(prob.tspan), integrator.t), + (prob.u0, integrator.u), + integrator.sol.prob) end # get a cache where the RHS can be stored get_du(integrator::SimpleIntegrator3Sstar) = integrator.du -get_tmp_cache(integrator::SimpleIntegrator3Sstar) = (integrator.u_tmp1, integrator.u_tmp2) +function get_tmp_cache(integrator::SimpleIntegrator3Sstar) + (integrator.u_tmp1, integrator.u_tmp2) +end # some algorithms from DiffEq like FSAL-ones need to be informed when a callback has modified u u_modified!(integrator::SimpleIntegrator3Sstar, ::Bool) = false # used by adaptive timestepping algorithms in DiffEq function set_proposed_dt!(integrator::SimpleIntegrator3Sstar, dt) - integrator.dt = dt + integrator.dt = dt end # stop the time integration function terminate!(integrator::SimpleIntegrator3Sstar) - integrator.finalstep = true - empty!(integrator.opts.tstops) + integrator.finalstep = true + empty!(integrator.opts.tstops) end # used for AMR function Base.resize!(integrator::SimpleIntegrator3Sstar, new_size) - resize!(integrator.u, new_size) - resize!(integrator.du, new_size) - resize!(integrator.u_tmp1, new_size) - resize!(integrator.u_tmp2, new_size) + resize!(integrator.u, new_size) + resize!(integrator.du, new_size) + resize!(integrator.u_tmp1, new_size) + resize!(integrator.u_tmp2, new_size) end - - end # @muladd diff --git a/src/time_integration/time_integration.jl b/src/time_integration/time_integration.jl index a661c0b25ee..539e00ff700 100644 --- a/src/time_integration/time_integration.jl +++ b/src/time_integration/time_integration.jl @@ -3,18 +3,16 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # Wrapper type for solutions from Trixi.jl's own time integrators, partially mimicking # SciMLBase.ODESolution struct TimeIntegratorSolution{tType, uType, P} - t::tType - u::uType - prob::P + t::tType + u::uType + prob::P end include("methods_2N.jl") include("methods_3Sstar.jl") - - end # @muladd diff --git a/src/visualization/recipes_makie.jl b/src/visualization/recipes_makie.jl deleted file mode 100644 index 3fd3850c34d..00000000000 --- a/src/visualization/recipes_makie.jl +++ /dev/null @@ -1,382 +0,0 @@ -# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). -# Since these FMAs can increase the performance of many numerical algorithms, -# we need to opt-in explicitly. -# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. -@muladd begin - -# First some utilities -# Given a reference plotting triangulation, this function generates a plotting triangulation for -# the entire global mesh. The output can be plotted using `Makie.mesh`. -function global_plotting_triangulation_makie(pds::PlotDataSeries{<:PlotData2DTriangulated}; - set_z_coordinate_zero = false) - @unpack variable_id = pds - pd = pds.plot_data - @unpack x, y, data, t = pd - - makie_triangles = Makie.to_triangles(t) - - # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element. - # Note: Float32 is required by GeometryBasics - num_plotting_nodes, num_elements = size(x) - trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements) - coordinates = zeros(Float32, num_plotting_nodes, 3) - for element in Base.OneTo(num_elements) - for i in Base.OneTo(num_plotting_nodes) - coordinates[i, 1] = x[i, element] - coordinates[i, 2] = y[i, element] - if set_z_coordinate_zero == false - coordinates[i, 3] = data[i, element][variable_id] - end - end - trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), makie_triangles) - end - plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh - return plotting_mesh -end - -# Returns a list of `Makie.Point`s which can be used to plot the mesh, or a solution "wireframe" -# (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution). -function convert_PlotData2D_to_mesh_Points(pds::PlotDataSeries{<:PlotData2DTriangulated}; - set_z_coordinate_zero = false) - @unpack variable_id = pds - pd = pds.plot_data - @unpack x_face, y_face, face_data = pd - - if set_z_coordinate_zero - # plot 2d surface by setting z coordinate to zero. - # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots. - sol_f = zeros(eltype(first(x_face)), size(x_face)) - else - sol_f = StructArrays.component(face_data, variable_id) - end - - # This line separates solution lines on each edge by NaNs to ensure that they are rendered - # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix - # whose columns correspond to different elements. We add NaN separators by appending a row of - # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up - # plotting. - xyz_wireframe = GeometryBasics.Point.(map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face, sol_f))...) - - return xyz_wireframe -end - -# Creates a GeometryBasics triangulation for the visualization of a ScalarData2D plot object. -function global_plotting_triangulation_makie(pd::PlotData2DTriangulated{<:ScalarData}; - set_z_coordinate_zero = false) - @unpack x, y, data, t = pd - - makie_triangles = Makie.to_triangles(t) - - # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element. - # Note: Float32 is required by GeometryBasics - num_plotting_nodes, num_elements = size(x) - trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements) - coordinates = zeros(Float32, num_plotting_nodes, 3) - for element in Base.OneTo(num_elements) - for i in Base.OneTo(num_plotting_nodes) - coordinates[i, 1] = x[i, element] - coordinates[i, 2] = y[i, element] - if set_z_coordinate_zero == false - coordinates[i, 3] = data.data[i, element] - end - end - trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), makie_triangles) - end - plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh - return plotting_mesh -end - -# Returns a list of `GeometryBasics.Point`s which can be used to plot the mesh, or a solution "wireframe" -# (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution). -function convert_PlotData2D_to_mesh_Points(pd::PlotData2DTriangulated{<:ScalarData}; - set_z_coordinate_zero = false) - @unpack x_face, y_face, face_data = pd - - if set_z_coordinate_zero - # plot 2d surface by setting z coordinate to zero. - # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots. - sol_f = zeros(eltype(first(x_face)), size(x_face)) - else - sol_f = face_data - end - - # This line separates solution lines on each edge by NaNs to ensure that they are rendered - # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix - # whose columns correspond to different elements. We add NaN separators by appending a row of - # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up - # plotting. - xyz_wireframe = GeometryBasics.Point.(map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face, sol_f))...) - - return xyz_wireframe -end - - -# We set the Makie default colormap to match Plots.jl, which uses `:inferno` by default. -default_Makie_colormap() = :inferno - -# convenience struct for editing Makie plots after they're created. -struct FigureAndAxes{Axes} - fig::Makie.Figure - axes::Axes -end - -# for "quiet" return arguments to Makie.plot(::TrixiODESolution) and -# Makie.plot(::PlotData2DTriangulated) -Base.show(io::IO, fa::FigureAndAxes) = nothing - -# allows for returning fig, axes = Makie.plot(...) -function Base.iterate(fa::FigureAndAxes, state=1) - if state == 1 - return (fa.fig, 2) - elseif state == 2 - return (fa.axes, 3) - else - return nothing - end -end - -""" - iplot(u, mesh::UnstructuredMesh2D, equations, solver, cache; - plot_mesh=true, show_axis=false, colormap=default_Makie_colormap(), - variable_to_plot_in=1) - -Creates an interactive surface plot of the solution and mesh for an `UnstructuredMesh2D` type. - -Keywords: -- variable_to_plot_in: variable to show by default - -!!! warning "Experimental implementation" - This is an experimental feature and may change in future releases. -""" - -# Enables `iplot(PlotData2D(sol))`. -function iplot(pd::PlotData2DTriangulated; - plot_mesh=true, show_axis=false, colormap=default_Makie_colormap(), - variable_to_plot_in=1) - - @unpack variable_names = pd - - # Initialize a Makie figure that we'll add the solution and toggle switches to. - fig = Makie.Figure() - - # Set up options for the drop-down menu - menu_options = [zip(variable_names, 1:length(variable_names))...] - menu = Makie.Menu(fig, options=menu_options) - - # Initialize toggle switches for viewing the mesh - toggle_solution_mesh = Makie.Toggle(fig, active=plot_mesh) - toggle_mesh = Makie.Toggle(fig, active=plot_mesh) - - # Add dropdown menu and toggle switches to the left side of the figure. - fig[1, 1] = Makie.vgrid!( - Makie.Label(fig, "Solution field", width=nothing), menu, - Makie.Label(fig, "Solution mesh visible"), toggle_solution_mesh, - Makie.Label(fig, "Mesh visible"), toggle_mesh; - tellheight=false, width = 200 - ) - - # Create a zoomable interactive axis object on top of which to plot the solution. - ax = Makie.LScene(fig[1, 2], scenekw=(show_axis=show_axis,)) - - # Initialize the dropdown menu to `variable_to_plot_in` - # Since menu.selection is an Observable type, we need to dereference it using `[]` to set. - menu.selection[] = variable_to_plot_in - menu.i_selected[] = variable_to_plot_in - - # Since `variable_to_plot` is an Observable, these lines are re-run whenever `variable_to_plot[]` - # is updated from the drop-down menu. - plotting_mesh = Makie.@lift(global_plotting_triangulation_makie(getindex(pd, variable_names[$(menu.selection)]))) - solution_z = Makie.@lift(getindex.($plotting_mesh.position, 3)) - - # Plot the actual solution. - Makie.mesh!(ax, plotting_mesh; color=solution_z, colormap) - - # Create a mesh overlay by plotting a mesh both on top of and below the solution contours. - wire_points = Makie.@lift(convert_PlotData2D_to_mesh_Points(getindex(pd, variable_names[$(menu.selection)]))) - wire_mesh_top = Makie.lines!(ax, wire_points, color=:white) - wire_mesh_bottom = Makie.lines!(ax, wire_points, color=:white) - Makie.translate!(wire_mesh_top, 0, 0, 1e-3) - Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3) - - # This draws flat mesh lines below the solution. - function compute_z_offset(solution_z) - zmin = minimum(solution_z) - zrange = (x->x[2]-x[1])(extrema(solution_z)) - return zmin - .25*zrange - end - z_offset = Makie.@lift(compute_z_offset($solution_z)) - get_flat_points(wire_points, z_offset) = [Makie.Point(point.data[1:2]..., z_offset) for point in wire_points] - flat_wire_points = Makie.@lift get_flat_points($wire_points, $z_offset) - wire_mesh_flat = Makie.lines!(ax, flat_wire_points, color=:black) - - # create a small variation in the extrema to avoid the Makie `range_step` cannot be zero error. - # see https://github.com/MakieOrg/Makie.jl/issues/931 for more details. - # the colorbar range is perturbed by 1e-5 * the magnitude of the solution. - function scaled_extrema(x) - ex = extrema(x) - if ex[2] ≈ ex[1] # if solution is close to constant, perturb colorbar - return ex .+ 1e-5 .* maximum(abs.(ex)) .* (-1, 1) - else - return ex - end - end - - # Resets the colorbar each time the solution changes. - Makie.Colorbar(fig[1, 3], limits = Makie.@lift(scaled_extrema($solution_z)), colormap=colormap) - - # This syncs the toggle buttons to the mesh plots. - Makie.connect!(wire_mesh_top.visible, toggle_solution_mesh.active) - Makie.connect!(wire_mesh_bottom.visible, toggle_solution_mesh.active) - Makie.connect!(wire_mesh_flat.visible, toggle_mesh.active) - - # On OSX, shift-command-4 for screenshots triggers a constant "up-zoom". - # To avoid this, we remap up-zoom to the right shift button instead. - Makie.cameracontrols(ax.scene).attributes[:up_key][] = Makie.Keyboard.right_shift - - # typing this pulls up the figure (similar to display(plot!()) in Plots.jl) - fig -end - -function iplot(u, mesh, equations, solver, cache; - solution_variables=nothing, nvisnodes=2*nnodes(solver), kwargs...) - @assert ndims(mesh) == 2 - - pd = PlotData2DTriangulated(u, mesh, equations, solver, cache; - solution_variables=solution_variables, nvisnodes=nvisnodes) - - iplot(pd; kwargs...) -end - -# redirect `iplot(sol)` to dispatchable `iplot` signature. -iplot(sol::TrixiODESolution; kwargs...) = iplot(sol.u[end], sol.prob.p; kwargs...) -iplot(u, semi; kwargs...) = iplot(wrap_array_native(u, semi), mesh_equations_solver_cache(semi)...; kwargs...) - -# Interactive visualization of user-defined ScalarData. -function iplot(pd::PlotData2DTriangulated{<:ScalarData}; - show_axis=false, colormap=default_Makie_colormap(), plot_mesh=false) - fig = Makie.Figure() - - # Create a zoomable interactive axis object on top of which to plot the solution. - ax = Makie.LScene(fig[1, 1], scenekw=(show_axis=show_axis,)) - - # plot the user-defined ScalarData - fig_axis_plt = iplot!(FigureAndAxes(fig, ax), pd; colormap=colormap, plot_mesh=plot_mesh) - - fig - return fig_axis_plt -end - -function iplot!(fig_axis::Union{FigureAndAxes, Makie.FigureAxisPlot}, - pd::PlotData2DTriangulated{<:ScalarData}; - colormap=default_Makie_colormap(), plot_mesh=false) - - # destructure first two fields of either FigureAndAxes or Makie.FigureAxisPlot - fig, ax = fig_axis - - # create triangulation of the scalar data to plot - plotting_mesh = global_plotting_triangulation_makie(pd) - solution_z = getindex.(plotting_mesh.position, 3) - plt = Makie.mesh!(ax, plotting_mesh; color=solution_z, colormap) - - if plot_mesh - wire_points = convert_PlotData2D_to_mesh_Points(pd) - wire_mesh_top = Makie.lines!(ax, wire_points, color=:white) - wire_mesh_bottom = Makie.lines!(ax, wire_points, color=:white) - Makie.translate!(wire_mesh_top, 0, 0, 1e-3) - Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3) - end - - # Add a colorbar to the rightmost part of the layout - Makie.Colorbar(fig[1, end+1], plt) - - fig - return Makie.FigureAxisPlot(fig, ax, plt) -end - -# ================== new Makie plot recipes ==================== - -# This initializes a Makie recipe, which creates a new type definition which Makie uses to create -# custom `trixiheatmap` plots. See also https://makie.juliaplots.org/stable/recipes.html -@Makie.recipe(TrixiHeatmap, plot_data_series) do scene - Makie.Theme( - colormap = default_Makie_colormap() - ) -end - -function Makie.plot!(myplot::TrixiHeatmap) - pds = myplot[:plot_data_series][] - - plotting_mesh = global_plotting_triangulation_makie(pds; set_z_coordinate_zero = true) - - @unpack variable_id = pds - pd = pds.plot_data - solution_z = vec(StructArrays.component(pd.data, variable_id)) - Makie.mesh!(myplot, plotting_mesh, color=solution_z, shading=false, colormap=myplot[:colormap]) - myplot.colorrange = extrema(solution_z) - - # Makie hides keyword arguments within `myplot`; see also - # https://github.com/JuliaPlots/Makie.jl/issues/837#issuecomment-845985070 - plot_mesh = if haskey(myplot, :plot_mesh) - myplot.plot_mesh[] - else - true # default to plotting the mesh - end - - if plot_mesh - xyz_wireframe = convert_PlotData2D_to_mesh_Points(pds; set_z_coordinate_zero = true) - Makie.lines!(myplot, xyz_wireframe, color=:lightgrey) - end - - myplot -end - -# redirects Makie.plot(pd::PlotDataSeries) to custom recipe TrixiHeatmap(pd) -Makie.plottype(::Trixi.PlotDataSeries{<:Trixi.PlotData2DTriangulated}) = TrixiHeatmap - -# Makie does not yet support layouts in its plot recipes, so we overload `Makie.plot` directly. -function Makie.plot(sol::TrixiODESolution; - plot_mesh=false, solution_variables=nothing, colormap=default_Makie_colormap()) - return Makie.plot(PlotData2DTriangulated(sol; solution_variables); plot_mesh, colormap) -end - -function Makie.plot(pd::PlotData2DTriangulated, fig=Makie.Figure(); - plot_mesh=false, colormap=default_Makie_colormap()) - figAxes = Makie.plot!(fig, pd; plot_mesh, colormap) - display(figAxes.fig) - return figAxes -end - -function Makie.plot!(fig, pd::PlotData2DTriangulated; - plot_mesh=false, colormap=default_Makie_colormap()) - # Create layout that is as square as possible, when there are more than 3 subplots. - # This is done with a preference for more columns than rows if not. - if length(pd) <= 3 - cols = length(pd) - rows = 1 - else - cols = ceil(Int, sqrt(length(pd))) - rows = cld(length(pd), cols) - end - - axes = [Makie.Axis(fig[i,j], xlabel="x", ylabel="y") for j in 1:rows, i in 1:cols] - row_list, col_list = [i for j in 1:rows, i in 1:cols], [j for j in 1:rows, i in 1:cols] - - for (variable_to_plot, (variable_name, pds)) in enumerate(pd) - ax = axes[variable_to_plot] - plt = trixiheatmap!(ax, pds; plot_mesh, colormap) - - row = row_list[variable_to_plot] - col = col_list[variable_to_plot] - Makie.Colorbar(fig[row, col][1,2], plt) - - ax.aspect = Makie.DataAspect() # equal aspect ratio - ax.title = variable_name - Makie.xlims!(ax, extrema(pd.x)) - Makie.ylims!(ax, extrema(pd.y)) - end - - return FigureAndAxes(fig, axes) -end - - -end # @muladd diff --git a/src/visualization/recipes_plots.jl b/src/visualization/recipes_plots.jl index db621199947..d15f7e542e1 100644 --- a/src/visualization/recipes_plots.jl +++ b/src/visualization/recipes_plots.jl @@ -3,273 +3,276 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent # Visualize a single variable in a 2D plot (default: heatmap) # # Note: This is an experimental feature and may be changed in future releases without notice. RecipesBase.@recipe function f(pds::PlotDataSeries{<:AbstractPlotData{2}}) - @unpack plot_data, variable_id = pds - @unpack x, y, data, variable_names, orientation_x, orientation_y = plot_data - - # Set geometric properties - xlims --> (x[begin], x[end]) - ylims --> (y[begin], y[end]) - aspect_ratio --> :equal - - # Set annotation properties - legend --> :none - title --> variable_names[variable_id] - colorbar --> :true - xguide --> _get_guide(orientation_x) - yguide --> _get_guide(orientation_y) - - # Set series properties - seriestype --> :heatmap - - # Return data for plotting - x, y, data[variable_id] + @unpack plot_data, variable_id = pds + @unpack x, y, data, variable_names, orientation_x, orientation_y = plot_data + + # Set geometric properties + xlims --> (x[begin], x[end]) + ylims --> (y[begin], y[end]) + aspect_ratio --> :equal + + # Set annotation properties + legend --> :none + title --> variable_names[variable_id] + colorbar --> :true + xguide --> _get_guide(orientation_x) + yguide --> _get_guide(orientation_y) + + # Set series properties + seriestype --> :heatmap + + # Return data for plotting + x, y, data[variable_id] end # Visualize the mesh in a 2D plot # # Note: This is an experimental feature and may be changed in future releases without notice. RecipesBase.@recipe function f(pm::PlotMesh{<:AbstractPlotData{2}}) - @unpack plot_data = pm - @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data - - # Set geometric and annotation properties - xlims --> (x[begin], x[end]) - ylims --> (y[begin], y[end]) - aspect_ratio --> :equal - legend --> :none - grid --> false - - # Set series properties - seriestype --> :path - linecolor --> :grey - linewidth --> 1 - - # Return data for plotting - mesh_vertices_x, mesh_vertices_y + @unpack plot_data = pm + @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data + + # Set geometric and annotation properties + xlims --> (x[begin], x[end]) + ylims --> (y[begin], y[end]) + aspect_ratio --> :equal + legend --> :none + grid --> false + + # Set series properties + seriestype --> :path + linecolor --> :grey + linewidth --> 1 + + # Return data for plotting + mesh_vertices_x, mesh_vertices_y end - # Visualize the mesh in a 2D plot # # Note: This is an experimental feature and may be changed in future releases without notice. -RecipesBase.@recipe function f(pm::PlotMesh{<:PlotData2DCartesian{<:Any, <:AbstractVector{<:AbstractVector}}}) - @unpack plot_data = pm - @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data - - # Set geometric and annotation properties - xlims --> (minimum(x), maximum(x)) - ylims --> (minimum(y), maximum(y)) - aspect_ratio --> :equal - legend --> :none - grid --> false - - # Set series properties - seriestype --> :path - linecolor --> :grey - linewidth --> 1 - - # Return data for plotting - mesh_vertices_x, mesh_vertices_y +RecipesBase.@recipe function f(pm::PlotMesh{ + <:PlotData2DCartesian{<:Any, + <:AbstractVector{ + <:AbstractVector + }}}) + @unpack plot_data = pm + @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data + + # Set geometric and annotation properties + xlims --> (minimum(x), maximum(x)) + ylims --> (minimum(y), maximum(y)) + aspect_ratio --> :equal + legend --> :none + grid --> false + + # Set series properties + seriestype --> :path + linecolor --> :grey + linewidth --> 1 + + # Return data for plotting + mesh_vertices_x, mesh_vertices_y end - # Plot all available variables at once for convenience # # Note: This is an experimental feature and may be changed in future releases without notice. RecipesBase.@recipe function f(pd::AbstractPlotData) - # Create layout that is as square as possible, when there are more than 3 subplots. - # This is done with a preference for more columns than rows if not. - - if length(pd) <= 3 - cols = length(pd) - rows = 1 - else - cols = ceil(Int, sqrt(length(pd))) - rows = ceil(Int, length(pd)/cols) - end - - layout := (rows, cols) - - # Plot all existing variables - for (i, (variable_name, series)) in enumerate(pd) - RecipesBase.@series begin - subplot := i - series + # Create layout that is as square as possible, when there are more than 3 subplots. + # This is done with a preference for more columns than rows if not. + + if length(pd) <= 3 + cols = length(pd) + rows = 1 + else + cols = ceil(Int, sqrt(length(pd))) + rows = ceil(Int, length(pd) / cols) + end + + layout := (rows, cols) + + # Plot all existing variables + for (i, (variable_name, series)) in enumerate(pd) + RecipesBase.@series begin + subplot := i + series + end end - end - - # Fill remaining subplots with empty plot - for i in (length(pd)+1):(rows*cols) - RecipesBase.@series begin - subplot := i - axis := false - ticks := false - legend := false - [], [] + + # Fill remaining subplots with empty plot + for i in (length(pd) + 1):(rows * cols) + RecipesBase.@series begin + subplot := i + axis := false + ticks := false + legend := false + [], [] + end end - end end # Plot a single variable. RecipesBase.@recipe function f(pds::PlotDataSeries{<:AbstractPlotData{1}}) - @unpack plot_data, variable_id = pds - @unpack x, data, variable_names, orientation_x = plot_data + @unpack plot_data, variable_id = pds + @unpack x, data, variable_names, orientation_x = plot_data - # Set geometric properties - xlims --> (x[begin], x[end]) + # Set geometric properties + xlims --> (x[begin], x[end]) - # Set annotation properties - legend --> :none - title --> variable_names[variable_id] - xguide --> _get_guide(orientation_x) + # Set annotation properties + legend --> :none + title --> variable_names[variable_id] + xguide --> _get_guide(orientation_x) - # Return data for plotting - x, data[:, variable_id] + # Return data for plotting + x, data[:, variable_id] end # Plot the mesh as vertical lines from a PlotMesh object. RecipesBase.@recipe function f(pm::PlotMesh{<:AbstractPlotData{1}}) - @unpack plot_data = pm - @unpack x, mesh_vertices_x = plot_data + @unpack plot_data = pm + @unpack x, mesh_vertices_x = plot_data - # Set geometric and annotation properties - xlims --> (x[begin], x[end]) - legend --> :none + # Set geometric and annotation properties + xlims --> (x[begin], x[end]) + legend --> :none - # Set series properties - seriestype --> :vline - linecolor --> :grey - linewidth --> 1 + # Set series properties + seriestype --> :vline + linecolor --> :grey + linewidth --> 1 - # Return data for plotting - mesh_vertices_x + # Return data for plotting + mesh_vertices_x end - # Create a plot directly from a TrixiODESolution for convenience # The plot is created by a PlotData1D or PlotData2D object. # # Note: This is an experimental feature and may be changed in future releases without notice. RecipesBase.@recipe function f(sol::TrixiODESolution) - # Redirect everything to the recipes below - return sol.u[end], sol.prob.p + # Redirect everything to the recipes below + return sol.u[end], sol.prob.p end # Recipe for general semidiscretizations # Note: If you change the defaults values here, you need to also change them in the PlotData1D or PlotData2D # constructor. RecipesBase.@recipe function f(u, semi::AbstractSemidiscretization; - solution_variables=nothing) - if ndims(semi) == 1 - return PlotData1D(u, semi; solution_variables=solution_variables) - else - return PlotData2D(u, semi; solution_variables=solution_variables) - end + solution_variables = nothing) + if ndims(semi) == 1 + return PlotData1D(u, semi; solution_variables = solution_variables) + else + return PlotData2D(u, semi; solution_variables = solution_variables) + end end # Recipe specifically for TreeMesh-type solutions # Note: If you change the defaults values here, you need to also change them in the PlotData1D or PlotData2D # constructor. RecipesBase.@recipe function f(u, semi::SemidiscretizationHyperbolic{<:TreeMesh}; - solution_variables=nothing, - grid_lines=true, max_supported_level=11, nvisnodes=nothing, slice=:xy, - point=(0.0, 0.0, 0.0), curve=nothing) - # Create a PlotData1D or PlotData2D object depending on the dimension. - if ndims(semi) == 1 - return PlotData1D(u, semi; solution_variables, nvisnodes, slice, point, curve) - else - return PlotData2D(u, semi; - solution_variables, grid_lines, max_supported_level, - nvisnodes, slice, point) - end + solution_variables = nothing, + grid_lines = true, max_supported_level = 11, + nvisnodes = nothing, slice = :xy, + point = (0.0, 0.0, 0.0), curve = nothing) + # Create a PlotData1D or PlotData2D object depending on the dimension. + if ndims(semi) == 1 + return PlotData1D(u, semi; solution_variables, nvisnodes, slice, point, curve) + else + return PlotData2D(u, semi; + solution_variables, grid_lines, max_supported_level, + nvisnodes, slice, point) + end end # Series recipe for PlotData2DTriangulated RecipesBase.@recipe function f(pds::PlotDataSeries{<:PlotData2DTriangulated}) + pd = pds.plot_data + @unpack variable_id = pds + @unpack x, y, data, t, variable_names = pd + + # extract specific solution field to plot + data_field = zeros(eltype(first(data)), size(data)) + for (i, data_i) in enumerate(data) + data_field[i] = data_i[variable_id] + end - pd = pds.plot_data - @unpack variable_id = pds - @unpack x, y, data, t, variable_names = pd - - # extract specific solution field to plot - data_field = zeros(eltype(first(data)), size(data)) - for (i, data_i) in enumerate(data) - data_field[i] = data_i[variable_id] - end - - legend --> false - aspect_ratio --> 1 - title --> pd.variable_names[variable_id] - xlims --> extrema(x) - ylims --> extrema(y) - xguide --> _get_guide(1) - yguide --> _get_guide(2) - seriestype --> :heatmap - colorbar --> :true - - return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data_field, t)...) + legend --> false + aspect_ratio --> 1 + title --> pd.variable_names[variable_id] + xlims --> extrema(x) + ylims --> extrema(y) + xguide --> _get_guide(1) + yguide --> _get_guide(2) + seriestype --> :heatmap + colorbar --> :true + + return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data_field, + t)...) end # Visualize a 2D mesh given an `PlotData2DTriangulated` object RecipesBase.@recipe function f(pm::PlotMesh{<:PlotData2DTriangulated}) - pd = pm.plot_data - @unpack x_face, y_face = pd - - # This line separates solution lines on each edge by NaNs to ensure that they are rendered - # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix - # whose columns correspond to different elements. We add NaN separators by appending a row of - # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up - # plotting. - x_face, y_face = map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face)) - - xlims --> extrema(x_face) - ylims --> extrema(y_face) - aspect_ratio --> :equal - legend --> :none - - # Set series properties - seriestype --> :path - linecolor --> :grey - linewidth --> 1 - - return x_face, y_face + pd = pm.plot_data + @unpack x_face, y_face = pd + + # This line separates solution lines on each edge by NaNs to ensure that they are rendered + # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix + # whose columns correspond to different elements. We add NaN separators by appending a row of + # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up + # plotting. + x_face, y_face = map(x -> vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face)) + + xlims --> extrema(x_face) + ylims --> extrema(y_face) + aspect_ratio --> :equal + legend --> :none + + # Set series properties + seriestype --> :path + linecolor --> :grey + linewidth --> 1 + + return x_face, y_face end # Visualizes a single scalar field. Intended for use with ScalarPlotData2D. # Example usage: `plot(ScalarPlotData2D(u, semi))`. RecipesBase.@recipe function f(pd::PlotData2DTriangulated{<:ScalarData}) - - @unpack x, y, data, t, variable_names = pd - - title_string = isnothing(variable_names) ? "" : variable_names - - legend --> false - aspect_ratio --> 1 - title --> title_string - xlims --> extrema(x) - ylims --> extrema(y) - xguide --> _get_guide(1) - yguide --> _get_guide(2) - seriestype --> :heatmap - colorbar --> :true - - # Since `data` is simply a ScalarData wrapper around the actual plot data, we pass in - # `data.data` instead. - return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data.data, t)...) + @unpack x, y, data, t, variable_names = pd + + title_string = isnothing(variable_names) ? "" : variable_names + + legend --> false + aspect_ratio --> 1 + title --> title_string + xlims --> extrema(x) + ylims --> extrema(y) + xguide --> _get_guide(1) + yguide --> _get_guide(2) + seriestype --> :heatmap + colorbar --> :true + + # Since `data` is simply a ScalarData wrapper around the actual plot data, we pass in + # `data.data` instead. + return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data.data, + t)...) end -RecipesBase.@recipe function f(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, point_id::Integer) - return cb.affect!, point_id +RecipesBase.@recipe function f(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, + point_id::Integer) + return cb.affect!, point_id end -RecipesBase.@recipe function f(time_series_callback::TimeSeriesCallback, point_id::Integer) - return PlotData1D(time_series_callback, point_id) +RecipesBase.@recipe function f(time_series_callback::TimeSeriesCallback, + point_id::Integer) + return PlotData1D(time_series_callback, point_id) end - - end # @muladd diff --git a/src/visualization/types.jl b/src/visualization/types.jl index a83b5bc92c6..b294ce25607 100644 --- a/src/visualization/types.jl +++ b/src/visualization/types.jl @@ -4,16 +4,18 @@ # TimeIntegratorSolution. # # Note: This is an experimental feature and may be changed in future releases without notice. +#! format: off const TrixiODESolution = Union{ODESolution{T, N, uType, uType2, DType, tType, rateType, P} where {T, N, uType, uType2, DType, tType, rateType, P<:ODEProblem{uType_, tType_, isinplace, P_, F_} where {uType_, tType_, isinplace, P_<:AbstractSemidiscretization, F_}}, TimeIntegratorSolution} +#! format: on # By default, Julia/LLVM does not use fused multiply-add operations (FMAs). # Since these FMAs can increase the performance of many numerical algorithms, # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin - +#! format: noindent # This file holds plotting types which can be used for both Plots.jl and Makie.jl. @@ -28,12 +30,12 @@ Base.length(pd::AbstractPlotData) = length(pd.variable_names) Base.size(pd::AbstractPlotData) = (length(pd),) Base.keys(pd::AbstractPlotData) = tuple(pd.variable_names...) -function Base.iterate(pd::AbstractPlotData, state=1) - if state > length(pd) - return nothing - else - return (pd.variable_names[state] => pd[pd.variable_names[state]], state + 1) - end +function Base.iterate(pd::AbstractPlotData, state = 1) + if state > length(pd) + return nothing + else + return (pd.variable_names[state] => pd[pd.variable_names[state]], state + 1) + end end """ @@ -45,19 +47,17 @@ Extract a single variable `variable_name` from `pd` for plotting with `Plots.plo This is an experimental feature and may change in future releases. """ function Base.getindex(pd::AbstractPlotData, variable_name) - variable_id = findfirst(isequal(variable_name), pd.variable_names) + variable_id = findfirst(isequal(variable_name), pd.variable_names) - if isnothing(variable_id) - throw(KeyError(variable_name)) - end + if isnothing(variable_id) + throw(KeyError(variable_name)) + end - return PlotDataSeries(pd, variable_id) + return PlotDataSeries(pd, variable_id) end Base.eltype(pd::AbstractPlotData) = Pair{String, PlotDataSeries{typeof(pd)}} - - """ PlotData2D @@ -67,53 +67,55 @@ mesh. !!! warning "Experimental implementation" This is an experimental feature and may change in future releases. """ -struct PlotData2DCartesian{Coordinates, Data, VariableNames, Vertices} <: AbstractPlotData{2} - x::Coordinates - y::Coordinates - data::Data - variable_names::VariableNames - mesh_vertices_x::Vertices - mesh_vertices_y::Vertices - orientation_x::Int - orientation_y::Int +struct PlotData2DCartesian{Coordinates, Data, VariableNames, Vertices} <: + AbstractPlotData{2} + x::Coordinates + y::Coordinates + data::Data + variable_names::VariableNames + mesh_vertices_x::Vertices + mesh_vertices_y::Vertices + orientation_x::Int + orientation_y::Int end # Show only a truncated output for convenience (the full data does not make sense) function Base.show(io::IO, pd::PlotData2DCartesian) - @nospecialize pd # reduce precompilation time - - print(io, "PlotData2DCartesian{", - typeof(pd.x), ",", - typeof(pd.data), ",", - typeof(pd.variable_names), ",", - typeof(pd.mesh_vertices_x), - "}(, , , , , )") + @nospecialize pd # reduce precompilation time + + print(io, "PlotData2DCartesian{", + typeof(pd.x), ",", + typeof(pd.data), ",", + typeof(pd.variable_names), ",", + typeof(pd.mesh_vertices_x), + "}(, , , , , )") end - # holds plotting information for UnstructuredMesh2D and DGMulti-compatible meshes -struct PlotData2DTriangulated{DataType, NodeType, FaceNodeType, FaceDataType, VariableNames, PlottingTriangulation} <: AbstractPlotData{2} - x::NodeType # physical nodal coordinates, size (num_plotting_nodes x num_elements) - y::NodeType - data::DataType - t::PlottingTriangulation - x_face::FaceNodeType - y_face::FaceNodeType - face_data::FaceDataType - variable_names::VariableNames +struct PlotData2DTriangulated{DataType, NodeType, FaceNodeType, FaceDataType, + VariableNames, PlottingTriangulation} <: + AbstractPlotData{2} + x::NodeType # physical nodal coordinates, size (num_plotting_nodes x num_elements) + y::NodeType + data::DataType + t::PlottingTriangulation + x_face::FaceNodeType + y_face::FaceNodeType + face_data::FaceDataType + variable_names::VariableNames end # Show only a truncated output for convenience (the full data does not make sense) function Base.show(io::IO, pd::PlotData2DTriangulated) - @nospecialize pd # reduce precompilation time - - print(io, "PlotData2DTriangulated{", - typeof(pd.x), ", ", - typeof(pd.data), ", ", - typeof(pd.x_face), ", ", - typeof(pd.face_data), ", ", - typeof(pd.variable_names), - "}(, , , , , , , )") + @nospecialize pd # reduce precompilation time + + print(io, "PlotData2DTriangulated{", + typeof(pd.x), ", ", + typeof(pd.data), ", ", + typeof(pd.x_face), ", ", + typeof(pd.face_data), ", ", + typeof(pd.variable_names), + "}(, , , , , , , )") end """ @@ -126,49 +128,49 @@ mesh. This is an experimental feature and may change in future releases. """ struct PlotData1D{Coordinates, Data, VariableNames, Vertices} <: AbstractPlotData{1} - x::Coordinates - data::Data - variable_names::VariableNames - mesh_vertices_x::Vertices - orientation_x::Integer + x::Coordinates + data::Data + variable_names::VariableNames + mesh_vertices_x::Vertices + orientation_x::Integer end # Show only a truncated output for convenience (the full data does not make sense) function Base.show(io::IO, pd::PlotData1D) - print(io, "PlotData1D{", - typeof(pd.x), ",", - typeof(pd.data), ",", - typeof(pd.variable_names), ",", - typeof(pd.mesh_vertices_x), - "}(, , , )") + print(io, "PlotData1D{", + typeof(pd.x), ",", + typeof(pd.data), ",", + typeof(pd.variable_names), ",", + typeof(pd.mesh_vertices_x), + "}(, , , )") end # Auxiliary data structure for visualizing a single variable # # Note: This is an experimental feature and may be changed in future releases without notice. -struct PlotDataSeries{PD<:AbstractPlotData} - plot_data::PD - variable_id::Int +struct PlotDataSeries{PD <: AbstractPlotData} + plot_data::PD + variable_id::Int end # Show only a truncated output for convenience (the full data does not make sense) function Base.show(io::IO, pds::PlotDataSeries) - @nospecialize pds # reduce precompilation time + @nospecialize pds # reduce precompilation time - print(io, "PlotDataSeries{", typeof(pds.plot_data), "}(, ", - pds.variable_id, ")") + print(io, "PlotDataSeries{", typeof(pds.plot_data), "}(, ", + pds.variable_id, ")") end # Generic PlotMesh wrapper type. -struct PlotMesh{PD<:AbstractPlotData} - plot_data::PD +struct PlotMesh{PD <: AbstractPlotData} + plot_data::PD end # Show only a truncated output for convenience (the full data does not make sense) function Base.show(io::IO, pm::PlotMesh) - @nospecialize pm # reduce precompilation time + @nospecialize pm # reduce precompilation time - print(io, "PlotMesh{", typeof(pm.plot_data), "}()") + print(io, "PlotMesh{", typeof(pm.plot_data), "}()") end """ @@ -181,7 +183,6 @@ Extract grid lines from `pd` for plotting with `Plots.plot`. """ getmesh(pd::AbstractPlotData) = PlotMesh(pd) - """ PlotData2D(u, semi [or mesh, equations, solver, cache]; solution_variables=nothing, @@ -226,52 +227,67 @@ julia> plot(pd["scalar"]) # To plot only a single variable julia> plot!(getmesh(pd)) # To add grid lines to the plot ``` """ -PlotData2D(u_ode, semi; kwargs...) = PlotData2D(wrap_array_native(u_ode, semi), - mesh_equations_solver_cache(semi)...; - kwargs...) +function PlotData2D(u_ode, semi; kwargs...) + PlotData2D(wrap_array_native(u_ode, semi), + mesh_equations_solver_cache(semi)...; + kwargs...) +end # Redirect `PlotDataTriangulated2D` constructor. -PlotData2DTriangulated(u_ode, semi; kwargs...) = PlotData2DTriangulated(wrap_array_native(u_ode, semi), - mesh_equations_solver_cache(semi)...; - kwargs...) +function PlotData2DTriangulated(u_ode, semi; kwargs...) + PlotData2DTriangulated(wrap_array_native(u_ode, semi), + mesh_equations_solver_cache(semi)...; + kwargs...) +end # Create a PlotData2DCartesian object for TreeMeshes on default. -PlotData2D(u, mesh::TreeMesh, equations, solver, cache; kwargs...) = PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache; kwargs...) +function PlotData2D(u, mesh::TreeMesh, equations, solver, cache; kwargs...) + PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache; kwargs...) +end # Create a PlotData2DTriangulated object for any type of mesh other than the TreeMesh. -PlotData2D(u, mesh, equations, solver, cache; kwargs...) = PlotData2DTriangulated(u, mesh, equations, solver, cache; kwargs...) +function PlotData2D(u, mesh, equations, solver, cache; kwargs...) + PlotData2DTriangulated(u, mesh, equations, solver, cache; kwargs...) +end # Create a PlotData2DCartesian for a TreeMesh. function PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache; - solution_variables=nothing, - grid_lines=true, max_supported_level=11, nvisnodes=nothing, - slice=:xy, point=(0.0, 0.0, 0.0)) - @assert ndims(mesh) in (2, 3) "unsupported number of dimensions $ndims (must be 2 or 3)" - solution_variables_ = digest_solution_variables(equations, solution_variables) - - # Extract mesh info - center_level_0 = mesh.tree.center_level_0 - length_level_0 = mesh.tree.length_level_0 - leaf_cell_ids = leaf_cells(mesh.tree) - coordinates = mesh.tree.coordinates[:, leaf_cell_ids] - levels = mesh.tree.levels[leaf_cell_ids] - - unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache) - x, y, data, mesh_vertices_x, mesh_vertices_y = get_data_2d(center_level_0, length_level_0, - leaf_cell_ids, coordinates, levels, - ndims(mesh), unstructured_data, - nnodes(solver), grid_lines, - max_supported_level, nvisnodes, - slice, point) - variable_names = SVector(varnames(solution_variables_, equations)) - - orientation_x, orientation_y = _get_orientations(mesh, slice) - - return PlotData2DCartesian(x, y, data, variable_names, mesh_vertices_x, mesh_vertices_y, - orientation_x, orientation_y) + solution_variables = nothing, + grid_lines = true, max_supported_level = 11, + nvisnodes = nothing, + slice = :xy, point = (0.0, 0.0, 0.0)) + @assert ndims(mesh) in (2, 3) "unsupported number of dimensions $ndims (must be 2 or 3)" + solution_variables_ = digest_solution_variables(equations, solution_variables) + + # Extract mesh info + center_level_0 = mesh.tree.center_level_0 + length_level_0 = mesh.tree.length_level_0 + leaf_cell_ids = leaf_cells(mesh.tree) + coordinates = mesh.tree.coordinates[:, leaf_cell_ids] + levels = mesh.tree.levels[leaf_cell_ids] + + unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, + solver, cache) + x, y, data, mesh_vertices_x, mesh_vertices_y = get_data_2d(center_level_0, + length_level_0, + leaf_cell_ids, + coordinates, levels, + ndims(mesh), + unstructured_data, + nnodes(solver), + grid_lines, + max_supported_level, + nvisnodes, + slice, point) + variable_names = SVector(varnames(solution_variables_, equations)) + + orientation_x, orientation_y = _get_orientations(mesh, slice) + + return PlotData2DCartesian(x, y, data, variable_names, mesh_vertices_x, + mesh_vertices_y, + orientation_x, orientation_y) end - """ PlotData2D(sol; kwargs...) @@ -282,128 +298,143 @@ returns a `SciMLBase.ODESolution`) or Trixi.jl's own `solve!` (which returns a !!! warning "Experimental implementation" This is an experimental feature and may change in future releases. """ -PlotData2D(sol::TrixiODESolution; kwargs...) = PlotData2D(sol.u[end], sol.prob.p; kwargs...) +function PlotData2D(sol::TrixiODESolution; kwargs...) + PlotData2D(sol.u[end], sol.prob.p; kwargs...) +end # Also redirect when using PlotData2DTriangulate. -PlotData2DTriangulated(sol::TrixiODESolution; kwargs...) = PlotData2DTriangulated(sol.u[end], sol.prob.p; kwargs...) - +function PlotData2DTriangulated(sol::TrixiODESolution; kwargs...) + PlotData2DTriangulated(sol.u[end], sol.prob.p; kwargs...) +end # If `u` is an `Array{<:SVectors}` and not a `StructArray`, convert it to a `StructArray` first. function PlotData2D(u::Array{<:SVector, 2}, mesh, equations, dg::DGMulti, cache; - solution_variables=nothing, nvisnodes=2*nnodes(dg)) - nvars = length(first(u)) - u_structarray = StructArray{eltype(u)}(ntuple(_->zeros(eltype(first(u)), size(u)), nvars)) - for (i, u_i) in enumerate(u) - u_structarray[i] = u_i - end + solution_variables = nothing, nvisnodes = 2 * nnodes(dg)) + nvars = length(first(u)) + u_structarray = StructArray{eltype(u)}(ntuple(_ -> zeros(eltype(first(u)), size(u)), + nvars)) + for (i, u_i) in enumerate(u) + u_structarray[i] = u_i + end - # re-dispatch to PlotData2D with mesh, equations, dg, cache arguments - return PlotData2D(u_structarray, mesh, equations, dg, cache; - solution_variables=solution_variables, nvisnodes=nvisnodes) + # re-dispatch to PlotData2D with mesh, equations, dg, cache arguments + return PlotData2D(u_structarray, mesh, equations, dg, cache; + solution_variables = solution_variables, nvisnodes = nvisnodes) end # constructor which returns an `PlotData2DTriangulated` object. function PlotData2D(u::StructArray, mesh, equations, dg::DGMulti, cache; - solution_variables=nothing, nvisnodes=2*nnodes(dg)) + solution_variables = nothing, nvisnodes = 2 * nnodes(dg)) + rd = dg.basis + md = mesh.md - rd = dg.basis - md = mesh.md + # Vp = the interpolation matrix from nodal points to plotting points + @unpack Vp = rd + interpolate_to_plotting_points!(out, x) = mul!(out, Vp, x) - # Vp = the interpolation matrix from nodal points to plotting points - @unpack Vp = rd - interpolate_to_plotting_points!(out, x) = mul!(out, Vp, x) + solution_variables_ = digest_solution_variables(equations, solution_variables) + variable_names = SVector(varnames(solution_variables_, equations)) - solution_variables_ = digest_solution_variables(equations, solution_variables) - variable_names = SVector(varnames(solution_variables_, equations)) - - if Vp isa UniformScaling - num_plotting_points = size(u, 1) - else - num_plotting_points = size(Vp, 1) - end - nvars = nvariables(equations) - uEltype = eltype(first(u)) - u_plot = StructArray{SVector{nvars, uEltype}}(ntuple(_->zeros(uEltype, num_plotting_points, md.num_elements), nvars)) - - for e in eachelement(mesh, dg, cache) - # interpolate solution to plotting nodes element-by-element - StructArrays.foreachfield(interpolate_to_plotting_points!, view(u_plot, :, e), view(u, :, e)) - - # transform nodal values of the solution according to `solution_variables` - transform_to_solution_variables!(view(u_plot, :, e), solution_variables_, equations) - end + if Vp isa UniformScaling + num_plotting_points = size(u, 1) + else + num_plotting_points = size(Vp, 1) + end + nvars = nvariables(equations) + uEltype = eltype(first(u)) + u_plot = StructArray{SVector{nvars, uEltype}}(ntuple(_ -> zeros(uEltype, + num_plotting_points, + md.num_elements), + nvars)) + + for e in eachelement(mesh, dg, cache) + # interpolate solution to plotting nodes element-by-element + StructArrays.foreachfield(interpolate_to_plotting_points!, view(u_plot, :, e), + view(u, :, e)) + + # transform nodal values of the solution according to `solution_variables` + transform_to_solution_variables!(view(u_plot, :, e), solution_variables_, + equations) + end - # interpolate nodal coordinates to plotting points - x_plot, y_plot = map(x->Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates + # interpolate nodal coordinates to plotting points + x_plot, y_plot = map(x -> Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates - # construct a triangulation of the reference plotting nodes - t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points + # construct a triangulation of the reference plotting nodes + t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points - x_face, y_face, face_data = mesh_plotting_wireframe(u, mesh, equations, dg, cache; - nvisnodes=nvisnodes) + x_face, y_face, face_data = mesh_plotting_wireframe(u, mesh, equations, dg, cache; + nvisnodes = nvisnodes) - return PlotData2DTriangulated(x_plot, y_plot, u_plot, t, x_face, y_face, face_data, variable_names) + return PlotData2DTriangulated(x_plot, y_plot, u_plot, t, x_face, y_face, face_data, + variable_names) end # specializes the PlotData2D constructor to return an PlotData2DTriangulated for any type of mesh. function PlotData2DTriangulated(u, mesh, equations, dg::DGSEM, cache; - solution_variables=nothing, nvisnodes=2*polydeg(dg)) - - @assert ndims(mesh) == 2 "Input must be two-dimensional." - - n_nodes_2d = nnodes(dg)^ndims(mesh) - n_elements = nelements(dg, cache) - - # build nodes on reference element (seems to be the right ordering) - r, s = reference_node_coordinates_2d(dg) - - # reference plotting nodes - if nvisnodes == 0 || nvisnodes === nothing - nvisnodes = polydeg(dg) + 1 - end - plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes=nvisnodes) - - # create triangulation for plotting nodes - r_plot, s_plot = (x->plotting_interp_matrix*x).((r, s)) # interpolate dg nodes to plotting nodes - - # construct a triangulation of the plotting nodes - t = reference_plotting_triangulation((r_plot, s_plot)) - - # extract x,y coordinates and solutions on each element - uEltype = eltype(u) - nvars = nvariables(equations) - x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements) - y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements) - u_extracted = StructArray{SVector{nvars, uEltype}}(ntuple(_->similar(x, (n_nodes_2d, n_elements)), nvars)) - for element in eachelement(dg, cache) - sk = 1 - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - u_extracted[sk, element] = u_node - sk += 1 + solution_variables = nothing, + nvisnodes = 2 * polydeg(dg)) + @assert ndims(mesh)==2 "Input must be two-dimensional." + + n_nodes_2d = nnodes(dg)^ndims(mesh) + n_elements = nelements(dg, cache) + + # build nodes on reference element (seems to be the right ordering) + r, s = reference_node_coordinates_2d(dg) + + # reference plotting nodes + if nvisnodes == 0 || nvisnodes === nothing + nvisnodes = polydeg(dg) + 1 + end + plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes = nvisnodes) + + # create triangulation for plotting nodes + r_plot, s_plot = (x -> plotting_interp_matrix * x).((r, s)) # interpolate dg nodes to plotting nodes + + # construct a triangulation of the plotting nodes + t = reference_plotting_triangulation((r_plot, s_plot)) + + # extract x,y coordinates and solutions on each element + uEltype = eltype(u) + nvars = nvariables(equations) + x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, + n_elements) + y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, + n_elements) + u_extracted = StructArray{SVector{nvars, uEltype}}(ntuple(_ -> similar(x, + (n_nodes_2d, + n_elements)), + nvars)) + for element in eachelement(dg, cache) + sk = 1 + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + u_extracted[sk, element] = u_node + sk += 1 + end end - end - # interpolate to volume plotting points - xplot, yplot = plotting_interp_matrix*x, plotting_interp_matrix*y - uplot = StructArray{SVector{nvars, uEltype}}(map(x->plotting_interp_matrix*x, - StructArrays.components(u_extracted))) + # interpolate to volume plotting points + xplot, yplot = plotting_interp_matrix * x, plotting_interp_matrix * y + uplot = StructArray{SVector{nvars, uEltype}}(map(x -> plotting_interp_matrix * x, + StructArrays.components(u_extracted))) - xfp, yfp, ufp = mesh_plotting_wireframe(u_extracted, mesh, equations, dg, cache; nvisnodes=nvisnodes) + xfp, yfp, ufp = mesh_plotting_wireframe(u_extracted, mesh, equations, dg, cache; + nvisnodes = nvisnodes) - # convert variables based on solution_variables mapping - solution_variables_ = digest_solution_variables(equations, solution_variables) - variable_names = SVector(varnames(solution_variables_, equations)) + # convert variables based on solution_variables mapping + solution_variables_ = digest_solution_variables(equations, solution_variables) + variable_names = SVector(varnames(solution_variables_, equations)) - transform_to_solution_variables!(uplot, solution_variables_, equations) - transform_to_solution_variables!(ufp, solution_variables_, equations) + transform_to_solution_variables!(uplot, solution_variables_, equations) + transform_to_solution_variables!(ufp, solution_variables_, equations) - return PlotData2DTriangulated(xplot, yplot, uplot, t, xfp, yfp, ufp, variable_names) + return PlotData2DTriangulated(xplot, yplot, uplot, t, xfp, yfp, ufp, variable_names) end # Wrapper struct to indicate that an array represents a scalar data field. Used only for dispatch. struct ScalarData{T} - data::T + data::T end """ @@ -412,77 +443,77 @@ end Returns an `PlotData2DTriangulated` object which is used to visualize a single scalar field. `u` should be an array whose entries correspond to values of the scalar field at nodal points. """ -ScalarPlotData2D(u, semi::AbstractSemidiscretization; kwargs...) = - ScalarPlotData2D(u, mesh_equations_solver_cache(semi)...; kwargs...) +function ScalarPlotData2D(u, semi::AbstractSemidiscretization; kwargs...) + ScalarPlotData2D(u, mesh_equations_solver_cache(semi)...; kwargs...) +end # Returns an `PlotData2DTriangulated` which is used to visualize a single scalar field function ScalarPlotData2D(u, mesh, equations, dg::DGMulti, cache; - variable_name=nothing, nvisnodes=2*nnodes(dg)) + variable_name = nothing, nvisnodes = 2 * nnodes(dg)) + rd = dg.basis + md = mesh.md - rd = dg.basis - md = mesh.md + # Vp = the interpolation matrix from nodal points to plotting points + @unpack Vp = rd - # Vp = the interpolation matrix from nodal points to plotting points - @unpack Vp = rd + # interpolate nodal coordinates and solution field to plotting points + x_plot, y_plot = map(x -> Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates + u_plot = Vp * u - # interpolate nodal coordinates and solution field to plotting points - x_plot, y_plot = map(x->Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates - u_plot = Vp * u + # construct a triangulation of the reference plotting nodes + t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points - # construct a triangulation of the reference plotting nodes - t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points + # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using + # existing functionality based on `PlotData2D(sol)`. + x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, + dg, cache; + nvisnodes = 2 * nnodes(dg)) - # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using - # existing functionality based on `PlotData2D(sol)`. - x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, dg, cache; - nvisnodes=2*nnodes(dg)) - - # wrap solution in ScalarData struct for recipe dispatch - return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t, - x_face, y_face, face_data, variable_name) + # wrap solution in ScalarData struct for recipe dispatch + return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t, + x_face, y_face, face_data, variable_name) end -function ScalarPlotData2D(u, mesh, equations, dg::DGSEM, cache; variable_name=nothing, nvisnodes=2*nnodes(dg)) - - n_nodes_2d = nnodes(dg)^ndims(mesh) - n_elements = nelements(dg, cache) +function ScalarPlotData2D(u, mesh, equations, dg::DGSEM, cache; variable_name = nothing, + nvisnodes = 2 * nnodes(dg)) + n_nodes_2d = nnodes(dg)^ndims(mesh) + n_elements = nelements(dg, cache) - # build nodes on reference element (seems to be the right ordering) - r, s = reference_node_coordinates_2d(dg) + # build nodes on reference element (seems to be the right ordering) + r, s = reference_node_coordinates_2d(dg) - # reference plotting nodes - if nvisnodes == 0 || nvisnodes === nothing - nvisnodes = polydeg(dg) + 1 - end - plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes=nvisnodes) - - # create triangulation for plotting nodes - r_plot, s_plot = (x->plotting_interp_matrix*x).((r, s)) # interpolate dg nodes to plotting nodes + # reference plotting nodes + if nvisnodes == 0 || nvisnodes === nothing + nvisnodes = polydeg(dg) + 1 + end + plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes = nvisnodes) - # construct a triangulation of the plotting nodes - t = reference_plotting_triangulation((r_plot, s_plot)) + # create triangulation for plotting nodes + r_plot, s_plot = (x -> plotting_interp_matrix * x).((r, s)) # interpolate dg nodes to plotting nodes - # extract x,y coordinates and reshape them into matrices of size (n_nodes_2d, n_elements) - x = view(cache.elements.node_coordinates, 1, :, :, :) - y = view(cache.elements.node_coordinates, 2, :, :, :) - x, y = reshape.((x, y), n_nodes_2d, n_elements) + # construct a triangulation of the plotting nodes + t = reference_plotting_triangulation((r_plot, s_plot)) - # interpolate to volume plotting points by multiplying each column by `plotting_interp_matrix` - x_plot, y_plot = plotting_interp_matrix * x, plotting_interp_matrix * y - u_plot = plotting_interp_matrix * reshape(u, size(x)) + # extract x,y coordinates and reshape them into matrices of size (n_nodes_2d, n_elements) + x = view(cache.elements.node_coordinates, 1, :, :, :) + y = view(cache.elements.node_coordinates, 2, :, :, :) + x, y = reshape.((x, y), n_nodes_2d, n_elements) - # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using - # existing functionality based on `PlotData2D(sol)`. - x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, dg, cache; - nvisnodes=2*nnodes(dg)) + # interpolate to volume plotting points by multiplying each column by `plotting_interp_matrix` + x_plot, y_plot = plotting_interp_matrix * x, plotting_interp_matrix * y + u_plot = plotting_interp_matrix * reshape(u, size(x)) + # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using + # existing functionality based on `PlotData2D(sol)`. + x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, + dg, cache; + nvisnodes = 2 * nnodes(dg)) - # wrap solution in ScalarData struct for recipe dispatch - return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t, - x_face, y_face, face_data, variable_name) + # wrap solution in ScalarData struct for recipe dispatch + return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t, + x_face, y_face, face_data, variable_name) end - """ PlotData1D(u, semi [or mesh, equations, solver, cache]; solution_variables=nothing, nvisnodes=nothing) @@ -510,110 +541,149 @@ which define the curve. When using `curve` any other input from `slice` or `poin !!! warning "Experimental implementation" This is an experimental feature and may change in future releases. """ -PlotData1D(u_ode, semi; kwargs...) = PlotData1D(wrap_array_native(u_ode, semi), - mesh_equations_solver_cache(semi)...; - kwargs...) +function PlotData1D(u_ode, semi; kwargs...) + PlotData1D(wrap_array_native(u_ode, semi), + mesh_equations_solver_cache(semi)...; + kwargs...) +end function PlotData1D(u, mesh::TreeMesh, equations, solver, cache; - solution_variables=nothing, nvisnodes=nothing, - slice=:x, point=(0.0, 0.0, 0.0), curve=nothing) - - solution_variables_ = digest_solution_variables(equations, solution_variables) - variable_names = SVector(varnames(solution_variables_, equations)) - - original_nodes = cache.elements.node_coordinates - unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache) - - orientation_x = 0 # Set 'orientation' to zero on default. - - if ndims(mesh) == 1 - x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, nvisnodes) - orientation_x = 1 - elseif ndims(mesh) == 2 - if curve !== nothing - x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache) - else - x, data, mesh_vertices_x = unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point) + solution_variables = nothing, nvisnodes = nothing, + slice = :x, point = (0.0, 0.0, 0.0), curve = nothing) + solution_variables_ = digest_solution_variables(equations, solution_variables) + variable_names = SVector(varnames(solution_variables_, equations)) + + original_nodes = cache.elements.node_coordinates + unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, + solver, cache) + + orientation_x = 0 # Set 'orientation' to zero on default. + + if ndims(mesh) == 1 + x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, + nvisnodes) + orientation_x = 1 + + # Special care is required for first-order FV approximations since the nodes are the + # cell centers and do not contain the boundaries + n_nodes = size(unstructured_data, 1) + if n_nodes == 1 + n_visnodes = length(x) ÷ nelements(solver, cache) + if n_visnodes != 2 + throw(ArgumentError("This number of visualization nodes is currently not supported for finite volume approximations.")) + end + left_boundary = mesh.tree.center_level_0[1] - mesh.tree.length_level_0 / 2 + dx_2 = zero(left_boundary) + for i in 1:div(length(x), 2) + # Adjust plot nodes so that they are at the boundaries of each element + dx_2 = x[2 * i - 1] - left_boundary + x[2 * i - 1] -= dx_2 + x[2 * i] += dx_2 + left_boundary = left_boundary + 2 * dx_2 + + # Adjust mesh plot nodes + mesh_vertices_x[i] -= dx_2 + end + mesh_vertices_x[end] += dx_2 + end + elseif ndims(mesh) == 2 + if curve !== nothing + x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(original_nodes, + unstructured_data, + nvisnodes, curve, + mesh, solver, cache) + else + x, data, mesh_vertices_x = unstructured_2d_to_1d(original_nodes, + unstructured_data, + nvisnodes, slice, point) + end + else # ndims(mesh) == 3 + if curve !== nothing + x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, + unstructured_data, + nvisnodes, curve, + mesh, solver, cache) + else + x, data, mesh_vertices_x = unstructured_3d_to_1d(original_nodes, + unstructured_data, + nvisnodes, slice, point) + end end - else # ndims(mesh) == 3 - if curve !== nothing - x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache) - else - x, data, mesh_vertices_x = unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point) - end - end - return PlotData1D(x, data, variable_names, mesh_vertices_x, - orientation_x) + return PlotData1D(x, data, variable_names, mesh_vertices_x, + orientation_x) end function PlotData1D(u, mesh, equations, solver, cache; - solution_variables=nothing, nvisnodes=nothing, - slice=:x, point=(0.0, 0.0, 0.0), curve=nothing) - - solution_variables_ = digest_solution_variables(equations, solution_variables) - variable_names = SVector(varnames(solution_variables_, equations)) - - original_nodes = cache.elements.node_coordinates - unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache) - - orientation_x = 0 # Set 'orientation' to zero on default. - - if ndims(mesh) == 1 - x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, nvisnodes) - orientation_x = 1 - elseif ndims(mesh) == 2 - # Create a 'PlotData2DTriangulated' object so a triangulation can be used when extracting relevant data. - pd = PlotData2DTriangulated(u, mesh, equations, solver, cache; solution_variables, nvisnodes) - x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(pd, curve, slice, point, nvisnodes) - else # ndims(mesh) == 3 - # Extract the information required to create a PlotData1D object. - x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, u, curve, slice, point, nvisnodes) - end + solution_variables = nothing, nvisnodes = nothing, + slice = :x, point = (0.0, 0.0, 0.0), curve = nothing) + solution_variables_ = digest_solution_variables(equations, solution_variables) + variable_names = SVector(varnames(solution_variables_, equations)) + + original_nodes = cache.elements.node_coordinates + unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, + solver, cache) + + orientation_x = 0 # Set 'orientation' to zero on default. + + if ndims(mesh) == 1 + x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, + nvisnodes) + orientation_x = 1 + elseif ndims(mesh) == 2 + # Create a 'PlotData2DTriangulated' object so a triangulation can be used when extracting relevant data. + pd = PlotData2DTriangulated(u, mesh, equations, solver, cache; + solution_variables, nvisnodes) + x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(pd, curve, slice, point, + nvisnodes) + else # ndims(mesh) == 3 + # Extract the information required to create a PlotData1D object. + x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, u, curve, + slice, point, nvisnodes) + end - return PlotData1D(x, data, variable_names, mesh_vertices_x, - orientation_x) + return PlotData1D(x, data, variable_names, mesh_vertices_x, + orientation_x) end # Specializes the `PlotData1D` constructor for one-dimensional `DGMulti` solvers. function PlotData1D(u, mesh, equations, dg::DGMulti{1}, cache; - solution_variables=nothing) - - solution_variables_ = digest_solution_variables(equations, solution_variables) - variable_names = SVector(varnames(solution_variables_, equations)) - - orientation_x = 0 # Set 'orientation' to zero on default. - - if u isa StructArray - # Convert conserved variables to the given `solution_variables` and set up - # plotting coordinates - # This uses a "structure of arrays" - data = map(x -> vcat(dg.basis.Vp * x, fill(NaN, 1, size(u, 2))), - StructArrays.components(solution_variables_.(u, equations))) - x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2))) - - # Here, we ensure that `DGMulti` visualization uses the same data layout and format - # as `TreeMesh`. This enables us to reuse existing plot recipes. In particular, - # `hcat(data...)` creates a matrix of size `num_plotting_points` by `nvariables(equations)`, - # with data on different elements separated by `NaNs`. - x_plot = vec(x) - data_plot = hcat(vec.(data)...) - else - # Convert conserved variables to the given `solution_variables` and set up - # plotting coordinates - # This uses an "array of structures" - data_tmp = dg.basis.Vp * solution_variables_.(u, equations) - data = vcat(data_tmp, fill(NaN * zero(eltype(data_tmp)), 1, size(u, 2))) - x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2))) - - # Same as above - we create `data_plot` as array of size `num_plotting_points` - # by "number of plotting variables". - x_plot = vec(x) - data_plot = permutedims(reinterpret(reshape, eltype(eltype(data)), vec(data)), - (2, 1)) - end - - return PlotData1D(x_plot, data_plot, variable_names, mesh.md.VX, orientation_x) + solution_variables = nothing) + solution_variables_ = digest_solution_variables(equations, solution_variables) + variable_names = SVector(varnames(solution_variables_, equations)) + + orientation_x = 0 # Set 'orientation' to zero on default. + + if u isa StructArray + # Convert conserved variables to the given `solution_variables` and set up + # plotting coordinates + # This uses a "structure of arrays" + data = map(x -> vcat(dg.basis.Vp * x, fill(NaN, 1, size(u, 2))), + StructArrays.components(solution_variables_.(u, equations))) + x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2))) + + # Here, we ensure that `DGMulti` visualization uses the same data layout and format + # as `TreeMesh`. This enables us to reuse existing plot recipes. In particular, + # `hcat(data...)` creates a matrix of size `num_plotting_points` by `nvariables(equations)`, + # with data on different elements separated by `NaNs`. + x_plot = vec(x) + data_plot = hcat(vec.(data)...) + else + # Convert conserved variables to the given `solution_variables` and set up + # plotting coordinates + # This uses an "array of structures" + data_tmp = dg.basis.Vp * solution_variables_.(u, equations) + data = vcat(data_tmp, fill(NaN * zero(eltype(data_tmp)), 1, size(u, 2))) + x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2))) + + # Same as above - we create `data_plot` as array of size `num_plotting_points` + # by "number of plotting variables". + x_plot = vec(x) + data_plot = permutedims(reinterpret(reshape, eltype(eltype(data)), vec(data)), + (2, 1)) + end + + return PlotData1D(x_plot, data_plot, variable_names, mesh.md.VX, orientation_x) end """ @@ -626,26 +696,27 @@ Create a `PlotData1D` object from a solution object created by either `OrdinaryD !!! warning "Experimental implementation" This is an experimental feature and may change in future releases. """ -PlotData1D(sol::TrixiODESolution; kwargs...) = PlotData1D(sol.u[end], sol.prob.p; kwargs...) +function PlotData1D(sol::TrixiODESolution; kwargs...) + PlotData1D(sol.u[end], sol.prob.p; kwargs...) +end function PlotData1D(time_series_callback::TimeSeriesCallback, point_id::Integer) - @unpack time, variable_names, point_data = time_series_callback + @unpack time, variable_names, point_data = time_series_callback - n_solution_variables = length(variable_names) - data = Matrix{Float64}(undef, length(time), n_solution_variables) - reshaped = reshape(point_data[point_id], n_solution_variables, length(time)) - for v in 1:n_solution_variables - @views data[:, v] = reshaped[v, :] - end + n_solution_variables = length(variable_names) + data = Matrix{Float64}(undef, length(time), n_solution_variables) + reshaped = reshape(point_data[point_id], n_solution_variables, length(time)) + for v in 1:n_solution_variables + @views data[:, v] = reshaped[v, :] + end - mesh_vertices_x = Vector{Float64}(undef, 0) + mesh_vertices_x = Vector{Float64}(undef, 0) - return PlotData1D(time, data, SVector(variable_names), mesh_vertices_x, 0) + return PlotData1D(time, data, SVector(variable_names), mesh_vertices_x, 0) end -function PlotData1D(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, point_id::Integer) - return PlotData1D(cb.affect!, point_id) +function PlotData1D(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, + point_id::Integer) + return PlotData1D(cb.affect!, point_id) end - - end # @muladd diff --git a/src/visualization/utilities.jl b/src/visualization/utilities.jl index ba589073b92..05457395ac0 100644 --- a/src/visualization/utilities.jl +++ b/src/visualization/utilities.jl @@ -3,6 +3,7 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent @inline num_faces(elem::Tri) = 3 @inline num_faces(elem::Quad) = 4 @@ -13,7 +14,7 @@ # using the [Shoelace_formula](https://en.wikipedia.org/wiki/Shoelace_formula). function compute_triangle_area(tri) A, B, C = tri - return 0.5 * (A[1] * (B[2] - C[2]) + B[1] * (C[2]-A[2]) + C[1] * (A[2] - B[2])) + return 0.5 * (A[1] * (B[2] - C[2]) + B[1] * (C[2] - A[2]) + C[1] * (A[2] - B[2])) end # reference_plotting_triangulation(reference_plotting_coordinates) @@ -26,32 +27,33 @@ end # triangulation of the plotting points, with zero-volume triangles removed. # # For example, r[t[1, i]] returns the first reference coordinate of the 1st point on the ith triangle. -function reference_plotting_triangulation(reference_plotting_coordinates, tol=50*eps()) - # on-the-fly triangulation of plotting nodes on the reference element - tri_in = Triangulate.TriangulateIO() - tri_in.pointlist = permutedims(hcat(reference_plotting_coordinates...)) - tri_out, _ = Triangulate.triangulate("Q", tri_in) - triangles = tri_out.trianglelist - - # filter out sliver triangles - has_volume = fill(true, size(triangles, 2)) - for i in axes(triangles, 2) - ids = @view triangles[:, i] - x_points = @view tri_out.pointlist[1, ids] - y_points = @view tri_out.pointlist[2, ids] - area = compute_triangle_area(zip(x_points, y_points)) - if abs(area) < tol - has_volume[i] = false - end - end - return permutedims(triangles[:, findall(has_volume)]) +function reference_plotting_triangulation(reference_plotting_coordinates, + tol = 50 * eps()) + # on-the-fly triangulation of plotting nodes on the reference element + tri_in = Triangulate.TriangulateIO() + tri_in.pointlist = permutedims(hcat(reference_plotting_coordinates...)) + tri_out, _ = Triangulate.triangulate("Q", tri_in) + triangles = tri_out.trianglelist + + # filter out sliver triangles + has_volume = fill(true, size(triangles, 2)) + for i in axes(triangles, 2) + ids = @view triangles[:, i] + x_points = @view tri_out.pointlist[1, ids] + y_points = @view tri_out.pointlist[2, ids] + area = compute_triangle_area(zip(x_points, y_points)) + if abs(area) < tol + has_volume[i] = false + end + end + return permutedims(triangles[:, findall(has_volume)]) end # This function is used to avoid type instabilities when calling `digest_solution_variables`. function transform_to_solution_variables!(u, solution_variables, equations) - for (i, u_i) in enumerate(u) - u[i] = solution_variables(u_i, equations) - end + for (i, u_i) in enumerate(u) + u[i] = solution_variables(u_i, equations) + end end # global_plotting_triangulation_triplot(u_plot, rst_plot, xyz_plot) @@ -64,174 +66,196 @@ end # - u_plot = matrix of size (Nplot, K) representing solution to plot. # - t = triangulation of reference plotting points function global_plotting_triangulation_triplot(xyz_plot, u_plot, t) - - @assert size(first(xyz_plot), 1) == size(u_plot, 1) "Row dimension of u_plot does not match row dimension of xyz_plot" - - # build discontinuous data on plotting triangular mesh - num_plotting_points, num_elements = size(u_plot) - num_reference_plotting_triangles = size(t, 1) - num_plotting_elements_total = num_reference_plotting_triangles * num_elements - - # each column of `tp` corresponds to a vertex of a plotting triangle - tp = zeros(Int32, 3, num_plotting_elements_total) - zp = similar(tp, eltype(u_plot)) - for e = 1:num_elements - for i = 1:num_reference_plotting_triangles - tp[:, i + (e-1)*num_reference_plotting_triangles] .= @views t[i, :] .+ (e-1) * num_plotting_points - zp[:, i + (e-1)*num_reference_plotting_triangles] .= @views u_plot[t[i, :], e] + @assert size(first(xyz_plot), 1)==size(u_plot, 1) "Row dimension of u_plot does not match row dimension of xyz_plot" + + # build discontinuous data on plotting triangular mesh + num_plotting_points, num_elements = size(u_plot) + num_reference_plotting_triangles = size(t, 1) + num_plotting_elements_total = num_reference_plotting_triangles * num_elements + + # each column of `tp` corresponds to a vertex of a plotting triangle + tp = zeros(Int32, 3, num_plotting_elements_total) + zp = similar(tp, eltype(u_plot)) + for e in 1:num_elements + for i in 1:num_reference_plotting_triangles + tp[:, i + (e - 1) * num_reference_plotting_triangles] .= @views t[i, :] .+ + (e - 1) * + num_plotting_points + zp[:, i + (e - 1) * num_reference_plotting_triangles] .= @views u_plot[t[i, + :], + e] + end end - end - return vec.(xyz_plot)..., zp, tp + return vec.(xyz_plot)..., zp, tp end -function get_face_node_indices(r, s, dg::DGSEM, tol=100*eps()) - face_1 = findall(@. abs(s+1) < tol) - face_2 = findall(@. abs(r-1) < tol) - face_3 = findall(@. abs(s-1) < tol) - face_4 = findall(@. abs(r+1) < tol) - Fmask = hcat(face_1, face_2, face_3, face_4) - return Fmask +function get_face_node_indices(r, s, dg::DGSEM, tol = 100 * eps()) + face_1 = findall(@. abs(s + 1) < tol) + face_2 = findall(@. abs(r - 1) < tol) + face_3 = findall(@. abs(s - 1) < tol) + face_4 = findall(@. abs(r + 1) < tol) + Fmask = hcat(face_1, face_2, face_3, face_4) + return Fmask end # dispatch on semi -mesh_plotting_wireframe(u, semi) = mesh_plotting_wireframe(u, mesh_equations_solver_cache(semi)...) +function mesh_plotting_wireframe(u, semi) + mesh_plotting_wireframe(u, mesh_equations_solver_cache(semi)...) +end # mesh_plotting_wireframe(u, mesh, equations, dg::DGMulti, cache; num_plotting_pts=25) # # Generates data for plotting a mesh wireframe given StartUpDG data types. # Returns (plotting_coordinates_x, plotting_coordinates_y, nothing) for a 2D mesh wireframe. function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGMulti, cache; - nvisnodes=2*nnodes(dg)) - @unpack md = mesh - rd = dg.basis - - # Construct 1D plotting interpolation matrix `Vp1D` for a single face - @unpack N, Fmask = rd - num_face_points = length(Fmask) ÷ num_faces(rd.element_type) - vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), num_face_points - 1)) - rplot = LinRange(-1, 1, nvisnodes) - Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D - - num_faces_total = num_faces(rd.element_type) * md.num_elements - xf, yf = map(x->reshape(view(x, Fmask, :), num_face_points, num_faces_total), md.xyz) - uf = similar(u, size(xf)) - apply_to_each_field((out, x)->out .= reshape(view(x, Fmask, :), num_face_points, num_faces_total), uf, u) - - num_face_plotting_points = size(Vp1D, 1) - x_mesh, y_mesh = ntuple(_->zeros(num_face_plotting_points, num_faces_total), 2) - u_mesh = similar(u, (num_face_plotting_points, num_faces_total)) - for f in 1:num_faces_total - mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f)) - mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f)) - apply_to_each_field(mul_by!(Vp1D), view(u_mesh, :, f), view(uf, :, f)) - end - - return x_mesh, y_mesh, u_mesh -end - -function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGSEM, cache; nvisnodes=2*nnodes(dg)) - - # build nodes on reference element (seems to be the right ordering) - r, s = reference_node_coordinates_2d(dg) - - # extract node coordinates - uEltype = eltype(first(u)) - nvars = nvariables(equations) - n_nodes_2d = nnodes(dg)^ndims(mesh) - n_elements = nelements(dg, cache) - x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements) - y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements) - - # extract indices of local face nodes for wireframe plotting - Fmask = get_face_node_indices(r, s, dg) - plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; nvisnodes=nvisnodes) - - # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot. - # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size - # (Number of face plotting nodes) x (Number of faces). - function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements) - num_reference_faces = 2 * ndims(mesh) - xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :) - return reshape(xf, num_nodes_1D, num_elements * num_reference_faces) - end - reshape_and_interpolate(x) = plotting_interp_matrix1D * face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements) - xfp, yfp = map(reshape_and_interpolate, (x, y)) - ufp = StructArray{SVector{nvars, uEltype}}(map(reshape_and_interpolate, StructArrays.components(u))) - - return xfp, yfp, ufp -end + nvisnodes = 2 * nnodes(dg)) + @unpack md = mesh + rd = dg.basis + + # Construct 1D plotting interpolation matrix `Vp1D` for a single face + @unpack N, Fmask = rd + num_face_points = length(Fmask) ÷ num_faces(rd.element_type) + vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, + StartUpDG.nodes(Line(), + num_face_points - 1)) + rplot = LinRange(-1, 1, nvisnodes) + Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D + + num_faces_total = num_faces(rd.element_type) * md.num_elements + xf, yf = map(x -> reshape(view(x, Fmask, :), num_face_points, num_faces_total), + md.xyz) + uf = similar(u, size(xf)) + apply_to_each_field((out, x) -> out .= reshape(view(x, Fmask, :), num_face_points, + num_faces_total), uf, u) + + num_face_plotting_points = size(Vp1D, 1) + x_mesh, y_mesh = ntuple(_ -> zeros(num_face_plotting_points, num_faces_total), 2) + u_mesh = similar(u, (num_face_plotting_points, num_faces_total)) + for f in 1:num_faces_total + mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f)) + mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f)) + apply_to_each_field(mul_by!(Vp1D), view(u_mesh, :, f), view(uf, :, f)) + end -function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGSEM, cache; nvisnodes=2*nnodes(dg)) - - # build nodes on reference element (seems to be the right ordering) - r, s = reference_node_coordinates_2d(dg) - - # extract node coordinates - n_nodes_2d = nnodes(dg)^ndims(mesh) - n_elements = nelements(dg, cache) - x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements) - y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements) - - # extract indices of local face nodes for wireframe plotting - Fmask = get_face_node_indices(r, s, dg) - plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; nvisnodes=nvisnodes) - - # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot. - # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size - # (Number of face plotting nodes) x (Number of faces). - function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements) - num_reference_faces = 2 * ndims(mesh) - xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :) - return reshape(xf, num_nodes_1D, num_elements * num_reference_faces) - end - reshape_and_interpolate(x) = plotting_interp_matrix1D * face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements) - xfp, yfp, ufp = map(reshape_and_interpolate, (x, y, u.data)) - - return xfp, yfp, ufp + return x_mesh, y_mesh, u_mesh end -function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGMulti, cache; nvisnodes=2*nnodes(dg)) - - @unpack md = mesh - rd = dg.basis - - # Construct 1D plotting interpolation matrix `Vp1D` for a single face - @unpack N, Fmask = rd - vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), N)) - rplot = LinRange(-1, 1, nvisnodes) - Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D +function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGSEM, cache; + nvisnodes = 2 * nnodes(dg)) + + # build nodes on reference element (seems to be the right ordering) + r, s = reference_node_coordinates_2d(dg) + + # extract node coordinates + uEltype = eltype(first(u)) + nvars = nvariables(equations) + n_nodes_2d = nnodes(dg)^ndims(mesh) + n_elements = nelements(dg, cache) + x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, + n_elements) + y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, + n_elements) + + # extract indices of local face nodes for wireframe plotting + Fmask = get_face_node_indices(r, s, dg) + plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; + nvisnodes = nvisnodes) + + # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot. + # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size + # (Number of face plotting nodes) x (Number of faces). + function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements) + num_reference_faces = 2 * ndims(mesh) + xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :) + return reshape(xf, num_nodes_1D, num_elements * num_reference_faces) + end + function reshape_and_interpolate(x) + plotting_interp_matrix1D * + face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements) + end + xfp, yfp = map(reshape_and_interpolate, (x, y)) + ufp = StructArray{SVector{nvars, uEltype}}(map(reshape_and_interpolate, + StructArrays.components(u))) - num_face_points = N+1 - num_faces_total = num_faces(rd.element_type) * md.num_elements - xf, yf, uf = map(x->reshape(view(x, Fmask, :), num_face_points, num_faces_total), (md.xyz..., u.data)) + return xfp, yfp, ufp +end - num_face_plotting_points = size(Vp1D, 1) - x_mesh, y_mesh = ntuple(_->zeros(num_face_plotting_points, num_faces_total), 2) - u_mesh = similar(u.data, (num_face_plotting_points, num_faces_total)) - for f in 1:num_faces_total - mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f)) - mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f)) - mul!(view(u_mesh, :, f), Vp1D, view(uf, :, f)) - end +function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGSEM, cache; + nvisnodes = 2 * nnodes(dg)) + + # build nodes on reference element (seems to be the right ordering) + r, s = reference_node_coordinates_2d(dg) + + # extract node coordinates + n_nodes_2d = nnodes(dg)^ndims(mesh) + n_elements = nelements(dg, cache) + x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, + n_elements) + y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, + n_elements) + + # extract indices of local face nodes for wireframe plotting + Fmask = get_face_node_indices(r, s, dg) + plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; + nvisnodes = nvisnodes) + + # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot. + # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size + # (Number of face plotting nodes) x (Number of faces). + function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements) + num_reference_faces = 2 * ndims(mesh) + xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :) + return reshape(xf, num_nodes_1D, num_elements * num_reference_faces) + end + function reshape_and_interpolate(x) + plotting_interp_matrix1D * + face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements) + end + xfp, yfp, ufp = map(reshape_and_interpolate, (x, y, u.data)) - return x_mesh, y_mesh, u_mesh + return xfp, yfp, ufp end +function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGMulti, cache; + nvisnodes = 2 * nnodes(dg)) + @unpack md = mesh + rd = dg.basis + + # Construct 1D plotting interpolation matrix `Vp1D` for a single face + @unpack N, Fmask = rd + vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), N)) + rplot = LinRange(-1, 1, nvisnodes) + Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D + + num_face_points = N + 1 + num_faces_total = num_faces(rd.element_type) * md.num_elements + xf, yf, uf = map(x -> reshape(view(x, Fmask, :), num_face_points, num_faces_total), + (md.xyz..., u.data)) + + num_face_plotting_points = size(Vp1D, 1) + x_mesh, y_mesh = ntuple(_ -> zeros(num_face_plotting_points, num_faces_total), 2) + u_mesh = similar(u.data, (num_face_plotting_points, num_faces_total)) + for f in 1:num_faces_total + mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f)) + mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f)) + mul!(view(u_mesh, :, f), Vp1D, view(uf, :, f)) + end + return x_mesh, y_mesh, u_mesh +end # These methods are used internally to set the default value of the solution variables: # - If a `cons2prim` for the given `equations` exists, use it # - Otherwise, use `cons2cons`, which is defined for all systems of equations digest_solution_variables(equations, solution_variables) = solution_variables function digest_solution_variables(equations, solution_variables::Nothing) - if hasmethod(cons2prim, Tuple{AbstractVector, typeof(equations)}) - return cons2prim - else - return cons2cons - end + if hasmethod(cons2prim, Tuple{AbstractVector, typeof(equations)}) + return cons2prim + else + return cons2cons + end end - """ adapt_to_mesh_level!(u_ode, semi, level) adapt_to_mesh_level!(sol::Trixi.TrixiODESolution, level) @@ -240,21 +264,23 @@ Like [`adapt_to_mesh_level`](@ref), but modifies the solution and parts of the semidiscretization (mesh and caches) in place. """ function adapt_to_mesh_level!(u_ode, semi, level) - # Create AMR callback with controller that refines everything towards a single level - amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable=first), base_level=level) - amr_callback = AMRCallback(semi, amr_controller, interval=0) + # Create AMR callback with controller that refines everything towards a single level + amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first), + base_level = level) + amr_callback = AMRCallback(semi, amr_controller, interval = 0) - # Adapt mesh until it does not change anymore - has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0) - while has_changed + # Adapt mesh until it does not change anymore has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0) - end + while has_changed + has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0) + end - return u_ode, semi + return u_ode, semi end -adapt_to_mesh_level!(sol::TrixiODESolution, level) = adapt_to_mesh_level!(sol.u[end], sol.prob.p, level) - +function adapt_to_mesh_level!(sol::TrixiODESolution, level) + adapt_to_mesh_level!(sol.u[end], sol.prob.p, level) +end """ adapt_to_mesh_level(u_ode, semi, level) @@ -270,15 +296,16 @@ extracted as needed. See also: [`adapt_to_mesh_level!`](@ref) """ function adapt_to_mesh_level(u_ode, semi, level) - # Create new semidiscretization with copy of the current mesh - mesh, _, _, _ = mesh_equations_solver_cache(semi) - new_semi = remake(semi, mesh=deepcopy(mesh)) + # Create new semidiscretization with copy of the current mesh + mesh, _, _, _ = mesh_equations_solver_cache(semi) + new_semi = remake(semi, mesh = deepcopy(mesh)) - return adapt_to_mesh_level!(deepcopy(u_ode), new_semi, level) + return adapt_to_mesh_level!(deepcopy(u_ode), new_semi, level) end -adapt_to_mesh_level(sol::TrixiODESolution, level) = adapt_to_mesh_level(sol.u[end], sol.prob.p, level) - +function adapt_to_mesh_level(sol::TrixiODESolution, level) + adapt_to_mesh_level(sol.u[end], sol.prob.p, level) +end # Extract data from a 2D/3D DG solution and prepare it for visualization as a heatmap/contour plot. # @@ -291,69 +318,74 @@ adapt_to_mesh_level(sol::TrixiODESolution, level) = adapt_to_mesh_level(sol.u[en # # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. -function get_data_2d(center_level_0, length_level_0, leaf_cells, coordinates, levels, ndims, - unstructured_data, n_nodes, - grid_lines=false, max_supported_level=11, nvisnodes=nothing, - slice=:xy, point=(0.0, 0.0, 0.0)) - # Determine resolution for data interpolation - max_level = maximum(levels) - if max_level > max_supported_level - error("Maximum refinement level $max_level is higher than " * - "maximum supported level $max_supported_level") - end - max_available_nodes_per_finest_element = 2^(max_supported_level - max_level) - if nvisnodes === nothing - max_nvisnodes = 2 * n_nodes - elseif nvisnodes == 0 - max_nvisnodes = n_nodes - else - max_nvisnodes = nvisnodes - end - nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes) - resolution = nvisnodes_at_max_level * 2^max_level - nvisnodes_per_level = [2^(max_level - level)*nvisnodes_at_max_level for level in 0:max_level] - # nvisnodes_per_level is an array (accessed by "level + 1" to accommodate - # level-0-cell) that contains the number of visualization nodes for any - # refinement level to visualize on an equidistant grid - - if ndims == 3 - (unstructured_data, coordinates, levels, +function get_data_2d(center_level_0, length_level_0, leaf_cells, coordinates, levels, + ndims, unstructured_data, n_nodes, + grid_lines = false, max_supported_level = 11, nvisnodes = nothing, + slice = :xy, point = (0.0, 0.0, 0.0)) + # Determine resolution for data interpolation + max_level = maximum(levels) + if max_level > max_supported_level + error("Maximum refinement level $max_level is higher than " * + "maximum supported level $max_supported_level") + end + max_available_nodes_per_finest_element = 2^(max_supported_level - max_level) + if nvisnodes === nothing + max_nvisnodes = 2 * n_nodes + elseif nvisnodes == 0 + max_nvisnodes = n_nodes + else + max_nvisnodes = nvisnodes + end + nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes) + resolution = nvisnodes_at_max_level * 2^max_level + nvisnodes_per_level = [2^(max_level - level) * nvisnodes_at_max_level + for level in 0:max_level] + # nvisnodes_per_level is an array (accessed by "level + 1" to accommodate + # level-0-cell) that contains the number of visualization nodes for any + # refinement level to visualize on an equidistant grid + + if ndims == 3 + (unstructured_data, coordinates, levels, center_level_0) = unstructured_3d_to_2d(unstructured_data, - coordinates, levels, length_level_0, center_level_0, slice, - point) - end - - # Normalize element coordinates: move center to (0, 0) and domain size to [-1, 1]² - n_elements = length(levels) - normalized_coordinates = similar(coordinates) - for element_id in 1:n_elements - @views normalized_coordinates[:, element_id] .= ( - (coordinates[:, element_id] .- center_level_0) ./ (length_level_0 / 2 )) - end - - # Interpolate unstructured DG data to structured data - (structured_data = - unstructured2structured(unstructured_data, normalized_coordinates, - levels, resolution, nvisnodes_per_level)) - - # Interpolate cell-centered values to node-centered values - node_centered_data = cell2node(structured_data) - - # Determine axis coordinates for contour plot - xs = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[1] - ys = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[2] - - # Determine element vertices to plot grid lines - if grid_lines - mesh_vertices_x, mesh_vertices_y = calc_vertices(coordinates, levels, length_level_0) - else - mesh_vertices_x = Vector{Float64}(undef, 0) - mesh_vertices_y = Vector{Float64}(undef, 0) - end - - return xs, ys, node_centered_data, mesh_vertices_x, mesh_vertices_y -end + coordinates, levels, length_level_0, + center_level_0, slice, + point) + end + + # Normalize element coordinates: move center to (0, 0) and domain size to [-1, 1]² + n_elements = length(levels) + normalized_coordinates = similar(coordinates) + for element_id in 1:n_elements + @views normalized_coordinates[:, element_id] .= ((coordinates[:, element_id] .- + center_level_0) ./ + (length_level_0 / 2)) + end + + # Interpolate unstructured DG data to structured data + (structured_data = unstructured2structured(unstructured_data, + normalized_coordinates, + levels, resolution, nvisnodes_per_level)) + + # Interpolate cell-centered values to node-centered values + node_centered_data = cell2node(structured_data) + # Determine axis coordinates for contour plot + xs = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+ + center_level_0[1] + ys = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+ + center_level_0[2] + + # Determine element vertices to plot grid lines + if grid_lines + mesh_vertices_x, mesh_vertices_y = calc_vertices(coordinates, levels, + length_level_0) + else + mesh_vertices_x = Vector{Float64}(undef, 0) + mesh_vertices_y = Vector{Float64}(undef, 0) + end + + return xs, ys, node_centered_data, mesh_vertices_x, mesh_vertices_y +end # Extract data from a 1D DG solution and prepare it for visualization as a line plot. # This returns a tuple with @@ -363,43 +395,49 @@ end # Note: This is a low-level function that is not considered as part of Trixi's interface and may # thus be changed in future releases. function get_data_1d(original_nodes, unstructured_data, nvisnodes) - # Get the dimensions of u; where n_vars is the number of variables, n_nodes the number of nodal values per element and n_elements the total number of elements. - n_nodes, n_elements, n_vars = size(unstructured_data) - - # Set the amount of nodes visualized according to nvisnodes. - if nvisnodes === nothing - max_nvisnodes = 2 * n_nodes - elseif nvisnodes == 0 - max_nvisnodes = n_nodes - else - @assert nvisnodes >= 2 "nvisnodes must be zero or >= 2" - max_nvisnodes = nvisnodes - end - - interpolated_nodes = Array{eltype(original_nodes), 2}(undef, max_nvisnodes, n_elements) - interpolated_data = Array{eltype(unstructured_data), 3}(undef, max_nvisnodes, n_elements, n_vars) - - for j in 1:n_elements - # Interpolate on an equidistant grid. - interpolated_nodes[:, j] .= range(original_nodes[1,1,j], original_nodes[1,end,j], length = max_nvisnodes) - end - - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) - nodes_out = collect(range(-1, 1, length = max_nvisnodes)) - - # Calculate vandermonde matrix for interpolation. - vandermonde = polynomial_interpolation_matrix(nodes_in, nodes_out) - - # Iterate over all variables. - for v in 1:n_vars - # Interpolate data for each element. - for element in 1:n_elements - multiply_scalar_dimensionwise!(@view(interpolated_data[:, element, v]), - vandermonde, @view(unstructured_data[:, element, v])) + # Get the dimensions of u; where n_vars is the number of variables, n_nodes the number of nodal values per element and n_elements the total number of elements. + n_nodes, n_elements, n_vars = size(unstructured_data) + + # Set the amount of nodes visualized according to nvisnodes. + if nvisnodes === nothing + max_nvisnodes = 2 * n_nodes + elseif nvisnodes == 0 + max_nvisnodes = n_nodes + else + @assert nvisnodes>=2 "nvisnodes must be zero or >= 2" + max_nvisnodes = nvisnodes + end + + interpolated_nodes = Array{eltype(original_nodes), 2}(undef, max_nvisnodes, + n_elements) + interpolated_data = Array{eltype(unstructured_data), 3}(undef, max_nvisnodes, + n_elements, n_vars) + + for j in 1:n_elements + # Interpolate on an equidistant grid. + interpolated_nodes[:, j] .= range(original_nodes[1, 1, j], + original_nodes[1, end, j], + length = max_nvisnodes) + end + + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) + nodes_out = collect(range(-1, 1, length = max_nvisnodes)) + + # Calculate vandermonde matrix for interpolation. + vandermonde = polynomial_interpolation_matrix(nodes_in, nodes_out) + + # Iterate over all variables. + for v in 1:n_vars + # Interpolate data for each element. + for element in 1:n_elements + multiply_scalar_dimensionwise!(@view(interpolated_data[:, element, v]), + vandermonde, + @view(unstructured_data[:, element, v])) + end end - end - # Return results after data is reshaped - return vec(interpolated_nodes), reshape(interpolated_data, :, n_vars), vcat(original_nodes[1, 1, :], original_nodes[1, end, end]) + # Return results after data is reshaped + return vec(interpolated_nodes), reshape(interpolated_data, :, n_vars), + vcat(original_nodes[1, 1, :], original_nodes[1, end, end]) end # Change order of dimensions (variables are now last) and convert data to `solution_variables` @@ -407,40 +445,41 @@ end # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function get_unstructured_data(u, solution_variables, mesh, equations, solver, cache) + if solution_variables === cons2cons + raw_data = u + n_vars = size(raw_data, 1) + else + # FIXME: Remove this comment once the implementation following it has been verified + # Reinterpret the solution array as an array of conservative variables, + # compute the solution variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + # raw_data = Array(reinterpret(eltype(u), + # solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u), + # Ref(equations)))) + # n_vars = size(raw_data, 1) + n_vars_in = nvariables(equations) + n_vars = length(solution_variables(get_node_vars(u, equations, solver), + equations)) + raw_data = Array{eltype(u)}(undef, n_vars, Base.tail(size(u))...) + reshaped_u = reshape(u, n_vars_in, :) + reshaped_r = reshape(raw_data, n_vars, :) + for idx in axes(reshaped_u, 2) + reshaped_r[:, idx] = solution_variables(get_node_vars(reshaped_u, equations, + solver, idx), + equations) + end + end - if solution_variables === cons2cons - raw_data = u - n_vars = size(raw_data, 1) - else - # FIXME: Remove this comment once the implementation following it has been verified - # Reinterpret the solution array as an array of conservative variables, - # compute the solution variables via broadcasting, and reinterpret the - # result as a plain array of floating point numbers - # raw_data = Array(reinterpret(eltype(u), - # solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u), - # Ref(equations)))) - # n_vars = size(raw_data, 1) - n_vars_in = nvariables(equations) - n_vars = length(solution_variables(get_node_vars(u, equations, solver), equations)) - raw_data = Array{eltype(u)}(undef, n_vars, Base.tail(size(u))...) - reshaped_u = reshape(u, n_vars_in, :) - reshaped_r = reshape(raw_data, n_vars, :) - for idx in axes(reshaped_u, 2) - reshaped_r[:, idx] = solution_variables(get_node_vars(reshaped_u, equations, solver, idx), equations) - end - end - - unstructured_data = Array{eltype(raw_data)}(undef, - ntuple((d) -> nnodes(solver), ndims(equations))..., - nelements(solver, cache), n_vars) - for variable in 1:n_vars - @views unstructured_data[.., :, variable] .= raw_data[variable, .., :] - end - - return unstructured_data -end - + unstructured_data = Array{eltype(raw_data)}(undef, + ntuple((d) -> nnodes(solver), + ndims(equations))..., + nelements(solver, cache), n_vars) + for variable in 1:n_vars + @views unstructured_data[.., :, variable] .= raw_data[variable, .., :] + end + return unstructured_data +end # Convert cell-centered values to node-centered values by averaging over all # four neighbors and making use of the periodicity of the solution @@ -448,53 +487,51 @@ end # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function cell2node(cell_centered_data) - # Create temporary data structure to make the averaging algorithm as simple - # as possible (by using a ghost layer) - tmp = similar(first(cell_centered_data), size(first(cell_centered_data)) .+ (2, 2)) - - # Create output data structure - resolution_in, _ = size(first(cell_centered_data)) - resolution_out = resolution_in + 1 - node_centered_data = [Matrix{Float64}(undef, resolution_out, resolution_out) - for _ in 1:length(cell_centered_data)] - - - for (cell_data, node_data) in zip(cell_centered_data, node_centered_data) - # Fill center with original data - tmp[2:end-1, 2:end-1] .= cell_data - - # Fill sides with opposite data (periodic domain) - # x-direction - tmp[1, 2:end-1] .= cell_data[end, :] - tmp[end, 2:end-1] .= cell_data[1, :] - # y-direction - tmp[2:end-1, 1, ] .= cell_data[:, end] - tmp[2:end-1, end] .= cell_data[:, 1, ] - # Corners - tmp[1, 1, ] = cell_data[end, end] - tmp[end, 1, ] = cell_data[1, end] - tmp[1, end] = cell_data[end, 1, ] - tmp[end, end] = cell_data[1, 1, ] - - # Obtain node-centered value by averaging over neighboring cell-centered values - for j in 1:resolution_out - for i in 1:resolution_out - node_data[i, j] = (tmp[i, j, ] + - tmp[i+1, j, ] + - tmp[i, j+1] + - tmp[i+1, j+1]) / 4 - end - end - end - - # Transpose - for (index, data) in enumerate(node_centered_data) - node_centered_data[index] = permutedims(data) - end - - return node_centered_data -end + # Create temporary data structure to make the averaging algorithm as simple + # as possible (by using a ghost layer) + tmp = similar(first(cell_centered_data), size(first(cell_centered_data)) .+ (2, 2)) + + # Create output data structure + resolution_in, _ = size(first(cell_centered_data)) + resolution_out = resolution_in + 1 + node_centered_data = [Matrix{Float64}(undef, resolution_out, resolution_out) + for _ in 1:length(cell_centered_data)] + + for (cell_data, node_data) in zip(cell_centered_data, node_centered_data) + # Fill center with original data + tmp[2:(end - 1), 2:(end - 1)] .= cell_data + + # Fill sides with opposite data (periodic domain) + # x-direction + tmp[1, 2:(end - 1)] .= cell_data[end, :] + tmp[end, 2:(end - 1)] .= cell_data[1, :] + # y-direction + tmp[2:(end - 1), 1] .= cell_data[:, end] + tmp[2:(end - 1), end] .= cell_data[:, 1] + # Corners + tmp[1, 1] = cell_data[end, end] + tmp[end, 1] = cell_data[1, end] + tmp[1, end] = cell_data[end, 1] + tmp[end, end] = cell_data[1, 1] + + # Obtain node-centered value by averaging over neighboring cell-centered values + for j in 1:resolution_out + for i in 1:resolution_out + node_data[i, j] = (tmp[i, j] + + tmp[i + 1, j] + + tmp[i, j + 1] + + tmp[i + 1, j + 1]) / 4 + end + end + end + + # Transpose + for (index, data) in enumerate(node_centered_data) + node_centered_data[index] = permutedims(data) + end + return node_centered_data +end # Convert 3d unstructured data to 2d data. # Additional to the new unstructured data updated coordinates, levels and @@ -505,572 +542,627 @@ end function unstructured_3d_to_2d(unstructured_data, coordinates, levels, length_level_0, center_level_0, slice, point) - if slice === :yz - slice_dimension = 1 - other_dimensions = [2, 3] - elseif slice === :xz - slice_dimension = 2 - other_dimensions = [1, 3] - elseif slice === :xy - slice_dimension = 3 - other_dimensions = [1, 2] - else - error("illegal dimension '$slice', supported dimensions are :yz, :xz, and :xy") - end - - # Limits of domain in slice dimension - lower_limit = center_level_0[slice_dimension] - length_level_0 / 2 - upper_limit = center_level_0[slice_dimension] + length_level_0 / 2 - - @assert length(point) >= 3 "Point must be three-dimensional." - if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit - error(string("Slice plane is outside of domain.", - " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit")) - end - - # Extract data shape information - n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data) - - # Get node coordinates for DG locations on reference element - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - - # New unstructured data has one dimension less. - # The redundant element ids are removed later. - @views new_unstructured_data = similar(unstructured_data[1, ..]) - - # Declare new empty arrays to fill in new coordinates and levels - new_coordinates = Array{Float64}(undef, 2, n_elements) - new_levels = Array{eltype(levels)}(undef, n_elements) - - # Counter for new element ids - new_id = 0 - - # Save vandermonde matrices in a Dict to prevent redundant generation - vandermonde_to_2d = Dict() - - # Permute dimensions such that the slice dimension is always the - # third dimension of the array. Below we can always interpolate in the - # third dimension. - if slice === :yz - unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5]) - elseif slice === :xz - unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5]) - end - - for element_id in 1:n_elements - # Distance from center to border of this element (half the length) - element_length = length_level_0 / 2^levels[element_id] - min_coordinate = coordinates[:, element_id] .- element_length / 2 - max_coordinate = coordinates[:, element_id] .+ element_length / 2 - - # Check if slice plane and current element intersect. - # The first check uses a "greater but not equal" to only match one cell if the - # slice plane lies between two cells. - # The second check is needed if the slice plane is at the upper border of - # the domain due to this. - if !((min_coordinate[slice_dimension] <= point[slice_dimension] && - max_coordinate[slice_dimension] > point[slice_dimension]) || - (point[slice_dimension] == upper_limit && - max_coordinate[slice_dimension] == upper_limit)) - # Continue for loop if they don't intersect - continue - end - - # This element is of interest - new_id += 1 - - # Add element to new coordinates and levels - new_coordinates[:, new_id] = coordinates[other_dimensions, element_id] - new_levels[new_id] = levels[element_id] - - # Construct vandermonde matrix (or load from Dict if possible) - normalized_intercept = - (point[slice_dimension] - min_coordinate[slice_dimension]) / - element_length * 2 - 1 - - if haskey(vandermonde_to_2d, normalized_intercept) - vandermonde = vandermonde_to_2d[normalized_intercept] + if slice === :yz + slice_dimension = 1 + other_dimensions = [2, 3] + elseif slice === :xz + slice_dimension = 2 + other_dimensions = [1, 3] + elseif slice === :xy + slice_dimension = 3 + other_dimensions = [1, 2] else - # Generate vandermonde matrix to interpolate values at nodes_in to one value - vandermonde = polynomial_interpolation_matrix(nodes_in, [normalized_intercept]) - vandermonde_to_2d[normalized_intercept] = vandermonde + error("illegal dimension '$slice', supported dimensions are :yz, :xz, and :xy") + end + + # Limits of domain in slice dimension + lower_limit = center_level_0[slice_dimension] - length_level_0 / 2 + upper_limit = center_level_0[slice_dimension] + length_level_0 / 2 + + @assert length(point)>=3 "Point must be three-dimensional." + if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit + error(string("Slice plane is outside of domain.", + " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit")) + end + + # Extract data shape information + n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data) + + # Get node coordinates for DG locations on reference element + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) + + # New unstructured data has one dimension less. + # The redundant element ids are removed later. + @views new_unstructured_data = similar(unstructured_data[1, ..]) + + # Declare new empty arrays to fill in new coordinates and levels + new_coordinates = Array{Float64}(undef, 2, n_elements) + new_levels = Array{eltype(levels)}(undef, n_elements) + + # Counter for new element ids + new_id = 0 + + # Save vandermonde matrices in a Dict to prevent redundant generation + vandermonde_to_2d = Dict() + + # Permute dimensions such that the slice dimension is always the + # third dimension of the array. Below we can always interpolate in the + # third dimension. + if slice === :yz + unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5]) + elseif slice === :xz + unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5]) end - # 1D interpolation to specified slice plane - # We permuted the dimensions above such that now the dimension in which - # we will interpolate is always the third one. - for i in 1:n_nodes_in - for ii in 1:n_nodes_in - # Interpolate in the third dimension - data = unstructured_data[i, ii, :, element_id, :] + for element_id in 1:n_elements + # Distance from center to border of this element (half the length) + element_length = length_level_0 / 2^levels[element_id] + min_coordinate = coordinates[:, element_id] .- element_length / 2 + max_coordinate = coordinates[:, element_id] .+ element_length / 2 + + # Check if slice plane and current element intersect. + # The first check uses a "greater but not equal" to only match one cell if the + # slice plane lies between two cells. + # The second check is needed if the slice plane is at the upper border of + # the domain due to this. + if !((min_coordinate[slice_dimension] <= point[slice_dimension] && + max_coordinate[slice_dimension] > point[slice_dimension]) || + (point[slice_dimension] == upper_limit && + max_coordinate[slice_dimension] == upper_limit)) + # Continue for loop if they don't intersect + continue + end + + # This element is of interest + new_id += 1 + + # Add element to new coordinates and levels + new_coordinates[:, new_id] = coordinates[other_dimensions, element_id] + new_levels[new_id] = levels[element_id] + + # Construct vandermonde matrix (or load from Dict if possible) + normalized_intercept = (point[slice_dimension] - + min_coordinate[slice_dimension]) / + element_length * 2 - 1 + + if haskey(vandermonde_to_2d, normalized_intercept) + vandermonde = vandermonde_to_2d[normalized_intercept] + else + # Generate vandermonde matrix to interpolate values at nodes_in to one value + vandermonde = polynomial_interpolation_matrix(nodes_in, + [normalized_intercept]) + vandermonde_to_2d[normalized_intercept] = vandermonde + end - value = multiply_dimensionwise(vandermonde, permutedims(data)) - new_unstructured_data[i, ii, new_id, :] = value[:, 1] - end + # 1D interpolation to specified slice plane + # We permuted the dimensions above such that now the dimension in which + # we will interpolate is always the third one. + for i in 1:n_nodes_in + for ii in 1:n_nodes_in + # Interpolate in the third dimension + data = unstructured_data[i, ii, :, element_id, :] + + value = multiply_dimensionwise(vandermonde, permutedims(data)) + new_unstructured_data[i, ii, new_id, :] = value[:, 1] + end + end end - end - # Remove redundant element ids - unstructured_data = new_unstructured_data[:, :, 1:new_id, :] - new_coordinates = new_coordinates[:, 1:new_id] - new_levels = new_levels[1:new_id] + # Remove redundant element ids + unstructured_data = new_unstructured_data[:, :, 1:new_id, :] + new_coordinates = new_coordinates[:, 1:new_id] + new_levels = new_levels[1:new_id] - center_level_0 = center_level_0[other_dimensions] + center_level_0 = center_level_0[other_dimensions] - return unstructured_data, new_coordinates, new_levels, center_level_0 + return unstructured_data, new_coordinates, new_levels, center_level_0 end # Convert 2d unstructured data to 1d slice and interpolate them. -function unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point) - - if slice === :x - slice_dimension = 2 - other_dimension = 1 - elseif slice === :y - slice_dimension = 1 - other_dimension = 2 - else - error("illegal dimension '$slice', supported dimensions are :x and :y") - end - - # Set up data structures to store new 1D data. - @views new_unstructured_data = similar(unstructured_data[1, ..]) - @views new_nodes = similar(original_nodes[1, 1, ..]) - - n_nodes_in, _, n_elements, n_variables = size(unstructured_data) - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - - # Test if point lies in the domain. - lower_limit = original_nodes[1, 1, 1, 1] - upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_elements] - - @assert length(point) >= 2 "Point must be two-dimensional." - if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit - error(string("Slice axis is outside of domain. ", - " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit")) - end - - # Count the amount of new elements. - new_id = 0 - - # Permute dimensions so that the slice dimension is always in the correct place for later use. - if slice === :y - original_nodes = permutedims(original_nodes, [1, 3, 2, 4]) - unstructured_data = permutedims(unstructured_data, [2, 1, 3, 4]) - end - - # Iterate over all elements to find the ones that lie on the slice axis. - for element_id in 1:n_elements - min_coordinate = original_nodes[:, 1, 1, element_id] - max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, element_id] - element_length = max_coordinate - min_coordinate - - # Test if the element is on the slice axis. If not just continue with the next element. - if !((min_coordinate[slice_dimension] <= point[slice_dimension] && - max_coordinate[slice_dimension] > point[slice_dimension]) || - (point[slice_dimension] == upper_limit && max_coordinate[slice_dimension] == upper_limit)) - - continue - end - - new_id += 1 - - # Construct vandermonde matrix for interpolation of each 2D element to a 1D element. - normalized_intercept = - (point[slice_dimension] - min_coordinate[slice_dimension]) / - element_length[1] * 2 - 1 - vandermonde = polynomial_interpolation_matrix(nodes_in, normalized_intercept) - - # Interpolate to each node of new 1D element. - for v in 1:n_variables - for node in 1:n_nodes_in - new_unstructured_data[node, new_id, v] = (vandermonde*unstructured_data[node, :, element_id, v])[1] - end +function unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, + point) + if slice === :x + slice_dimension = 2 + other_dimension = 1 + elseif slice === :y + slice_dimension = 1 + other_dimension = 2 + else + error("illegal dimension '$slice', supported dimensions are :x and :y") end - new_nodes[:, new_id] = original_nodes[other_dimension, :, 1, element_id] - end + # Set up data structures to store new 1D data. + @views new_unstructured_data = similar(unstructured_data[1, ..]) + @views new_nodes = similar(original_nodes[1, 1, ..]) - return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), new_unstructured_data[:, 1:new_id, :], nvisnodes) -end + n_nodes_in, _, n_elements, n_variables = size(unstructured_data) + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) -# Calculate the arc length of a curve given by ndims x npoints point coordinates (piece-wise linear approximation) -function calc_arc_length(coordinates) - n_points = size(coordinates)[2] - arc_length = zeros(n_points) - for i in 1:n_points-1 - arc_length[i+1] = arc_length[i] + sqrt(sum((coordinates[:,i]-coordinates[:,i+1]).^2)) - end - return arc_length -end + # Test if point lies in the domain. + lower_limit = original_nodes[1, 1, 1, 1] + upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_elements] -# Convert 2d unstructured data to 1d data at given curve. -function unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache) + @assert length(point)>=2 "Point must be two-dimensional." + if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit + error(string("Slice axis is outside of domain. ", + " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit")) + end - n_points_curve = size(curve)[2] - n_nodes, _, n_elements, n_variables = size(unstructured_data) - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) + # Count the amount of new elements. + new_id = 0 - # Check if input is correct. - min = original_nodes[:, 1, 1, 1] - max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_elements] - @assert size(curve) == (2, size(curve)[2]) "Coordinates along curve must be 2xn dimensional." - for element in 1:n_points_curve - @assert (prod(vcat(curve[:, n_points_curve] .>= min, curve[:, n_points_curve] - .<= max))) "Some coordinates from `curve` are outside of the domain.." - end + # Permute dimensions so that the slice dimension is always in the correct place for later use. + if slice === :y + original_nodes = permutedims(original_nodes, [1, 3, 2, 4]) + unstructured_data = permutedims(unstructured_data, [2, 1, 3, 4]) + end - # Set nodes according to the length of the curve. - arc_length = calc_arc_length(curve) + # Iterate over all elements to find the ones that lie on the slice axis. + for element_id in 1:n_elements + min_coordinate = original_nodes[:, 1, 1, element_id] + max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, element_id] + element_length = max_coordinate - min_coordinate + + # Test if the element is on the slice axis. If not just continue with the next element. + if !((min_coordinate[slice_dimension] <= point[slice_dimension] && + max_coordinate[slice_dimension] > point[slice_dimension]) || + (point[slice_dimension] == upper_limit && + max_coordinate[slice_dimension] == upper_limit)) + continue + end - # Setup data structures. - data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) - temp_data = Array{Float64}(undef, n_nodes, n_points_curve, n_variables) + new_id += 1 + + # Construct vandermonde matrix for interpolation of each 2D element to a 1D element. + normalized_intercept = (point[slice_dimension] - + min_coordinate[slice_dimension]) / + element_length[1] * 2 - 1 + vandermonde = polynomial_interpolation_matrix(nodes_in, normalized_intercept) + + # Interpolate to each node of new 1D element. + for v in 1:n_variables + for node in 1:n_nodes_in + new_unstructured_data[node, new_id, v] = (vandermonde * unstructured_data[node, + :, + element_id, + v])[1] + end + end - # For each coordinate find the corresponding element with its id. - element_ids = get_elements_by_coordinates(curve, mesh, solver, cache) + new_nodes[:, new_id] = original_nodes[other_dimension, :, 1, element_id] + end - # Iterate over all found elements. - for element in 1:n_points_curve + return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), + new_unstructured_data[:, 1:new_id, :], nvisnodes) +end - min_coordinate = original_nodes[:, 1, 1, element_ids[element]] - max_coordinate = original_nodes[:, n_nodes, n_nodes, element_ids[element]] - element_length = max_coordinate - min_coordinate +# Calculate the arc length of a curve given by ndims x npoints point coordinates (piece-wise linear approximation) +function calc_arc_length(coordinates) + n_points = size(coordinates)[2] + arc_length = zeros(n_points) + for i in 1:(n_points - 1) + arc_length[i + 1] = arc_length[i] + + sqrt(sum((coordinates[:, i] - coordinates[:, i + 1]) .^ 2)) + end + return arc_length +end - normalized_coordinates = (curve[:, element] - min_coordinate)/element_length[1]*2 .-1 +# Convert 2d unstructured data to 1d data at given curve. +function unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, + curve, mesh, solver, cache) + n_points_curve = size(curve)[2] + n_nodes, _, n_elements, n_variables = size(unstructured_data) + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) + + # Check if input is correct. + min = original_nodes[:, 1, 1, 1] + max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_elements] + @assert size(curve)==(2, size(curve)[2]) "Coordinates along curve must be 2xn dimensional." + for element in 1:n_points_curve + @assert (prod(vcat(curve[:, n_points_curve] .>= min, + curve[:, n_points_curve] + .<= + max))) "Some coordinates from `curve` are outside of the domain.." + end - # Interpolate to a single point in each element. - vandermonde_x = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[1]) - vandermonde_y = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[2]) - for v in 1:n_variables - for i in 1:n_nodes - temp_data[i, element, v] = (vandermonde_y*unstructured_data[i, :, element_ids[element], v])[1] - end - data_on_curve[element, v] = (vandermonde_x*temp_data[:, element, v])[] + # Set nodes according to the length of the curve. + arc_length = calc_arc_length(curve) + + # Setup data structures. + data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) + temp_data = Array{Float64}(undef, n_nodes, n_points_curve, n_variables) + + # For each coordinate find the corresponding element with its id. + element_ids = get_elements_by_coordinates(curve, mesh, solver, cache) + + # Iterate over all found elements. + for element in 1:n_points_curve + min_coordinate = original_nodes[:, 1, 1, element_ids[element]] + max_coordinate = original_nodes[:, n_nodes, n_nodes, element_ids[element]] + element_length = max_coordinate - min_coordinate + + normalized_coordinates = (curve[:, element] - min_coordinate) / + element_length[1] * 2 .- 1 + + # Interpolate to a single point in each element. + vandermonde_x = polynomial_interpolation_matrix(nodes_in, + normalized_coordinates[1]) + vandermonde_y = polynomial_interpolation_matrix(nodes_in, + normalized_coordinates[2]) + for v in 1:n_variables + for i in 1:n_nodes + temp_data[i, element, v] = (vandermonde_y * unstructured_data[i, :, + element_ids[element], + v])[1] + end + data_on_curve[element, v] = (vandermonde_x * temp_data[:, element, v])[] + end end - end - return arc_length, data_on_curve, nothing + return arc_length, data_on_curve, nothing end # Convert a PlotData2DTriangulate object to a 1d data along given curve. function unstructured_2d_to_1d_curve(pd, input_curve, slice, point, nvisnodes) - # If no curve is defined, create a axis curve. - if input_curve === nothing - input_curve = axis_curve(pd.x, pd.y, nothing, slice, point, nvisnodes) - end + # If no curve is defined, create a axis curve. + if input_curve === nothing + input_curve = axis_curve(pd.x, pd.y, nothing, slice, point, nvisnodes) + end - @assert size(input_curve, 1) == 2 "Input 'curve' must be 2xn dimensional." + @assert size(input_curve, 1)==2 "Input 'curve' must be 2xn dimensional." - # For each coordinate find the corresponding triangle with its ids. - ids_by_coordinates = get_ids_by_coordinates(input_curve, pd) - found_coordinates = ids_by_coordinates[:, 1] .!= nothing + # For each coordinate find the corresponding triangle with its ids. + ids_by_coordinates = get_ids_by_coordinates(input_curve, pd) + found_coordinates = ids_by_coordinates[:, 1] .!= nothing - @assert found_coordinates != zeros(size(input_curve, 2)) "No points of 'curve' are inside of the solutions domain." + @assert found_coordinates!=zeros(size(input_curve, 2)) "No points of 'curve' are inside of the solutions domain." - # These hold the ids of the elements and triangles the points of the curve sit in. - element_ids = @view ids_by_coordinates[found_coordinates, 1] - triangle_ids = @view ids_by_coordinates[found_coordinates, 2] + # These hold the ids of the elements and triangles the points of the curve sit in. + element_ids = @view ids_by_coordinates[found_coordinates, 1] + triangle_ids = @view ids_by_coordinates[found_coordinates, 2] - # Shorten the curve, so that it contains only point that were found. - curve = @view input_curve[:, found_coordinates] + # Shorten the curve, so that it contains only point that were found. + curve = @view input_curve[:, found_coordinates] - n_variables = length(pd.data[1, 1]) - n_points_curve = size(curve, 2) + n_variables = length(pd.data[1, 1]) + n_points_curve = size(curve, 2) - # Set nodes according to the length of the curve. - arc_length = calc_arc_length(curve) + # Set nodes according to the length of the curve. + arc_length = calc_arc_length(curve) - # Setup data structures. - data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) + # Setup data structures. + data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) - # Iterate over all points on the curve. - for point in 1:n_points_curve - element = @view element_ids[point] - triangle = @view pd.t[triangle_ids[point], :] - for v in 1:n_variables - # Get the x and y coordinates of the corners of given triangle. - x_coordinates_triangle = SVector{3}(pd.x[triangle, element]) - y_coordinates_triangle = SVector{3}(pd.y[triangle, element]) + # Iterate over all points on the curve. + for point in 1:n_points_curve + element = @view element_ids[point] + triangle = @view pd.t[triangle_ids[point], :] + for v in 1:n_variables + # Get the x and y coordinates of the corners of given triangle. + x_coordinates_triangle = SVector{3}(pd.x[triangle, element]) + y_coordinates_triangle = SVector{3}(pd.y[triangle, element]) - # Extract solutions values in corners of the triangle. - values_triangle = SVector{3}(getindex.(view(pd.data, triangle, element), v)) + # Extract solutions values in corners of the triangle. + values_triangle = SVector{3}(getindex.(view(pd.data, triangle, element), v)) - # Linear interpolation in each triangle to the points on the curve. - data_on_curve[point, v] = triangle_interpolation(x_coordinates_triangle, y_coordinates_triangle, values_triangle, curve[:, point]) + # Linear interpolation in each triangle to the points on the curve. + data_on_curve[point, v] = triangle_interpolation(x_coordinates_triangle, + y_coordinates_triangle, + values_triangle, + curve[:, point]) + end end - end - return arc_length, data_on_curve, nothing + return arc_length, data_on_curve, nothing end # Convert 3d unstructured data to 1d data at given curve. -function unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache) - - n_points_curve = size(curve)[2] - n_nodes, _, _, n_elements, n_variables = size(unstructured_data) - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) - - # Check if input is correct. - min = original_nodes[:, 1, 1, 1, 1] - max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, n_elements] - @assert size(curve) == (3, n_points_curve) "Coordinates along curve must be 3xn dimensional." - for element in 1:n_points_curve - @assert (prod(vcat(curve[:, n_points_curve] .>= min, curve[:, n_points_curve] - .<= max))) "Some coordinates from `curve` are outside of the domain.." - end - - # Set nodes according to the length of the curve. - arc_length = calc_arc_length(curve) - - # Setup data structures. - data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) - temp_data = Array{Float64}(undef, n_nodes, n_nodes+1, n_points_curve, n_variables) - - # For each coordinate find the corresponding element with its id. - element_ids = get_elements_by_coordinates(curve, mesh, solver, cache) - - # Iterate over all found elements. - for element in 1:n_points_curve - - min_coordinate = original_nodes[:, 1, 1, 1, element_ids[element]] - max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, element_ids[element]] - element_length = max_coordinate - min_coordinate - - normalized_coordinates = (curve[:, element] - min_coordinate)/element_length[1]*2 .-1 +function unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, + curve, mesh, solver, cache) + n_points_curve = size(curve)[2] + n_nodes, _, _, n_elements, n_variables = size(unstructured_data) + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes) + + # Check if input is correct. + min = original_nodes[:, 1, 1, 1, 1] + max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, n_elements] + @assert size(curve)==(3, n_points_curve) "Coordinates along curve must be 3xn dimensional." + for element in 1:n_points_curve + @assert (prod(vcat(curve[:, n_points_curve] .>= min, + curve[:, n_points_curve] + .<= + max))) "Some coordinates from `curve` are outside of the domain.." + end - # Interpolate to a single point in each element. - vandermonde_x = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[1]) - vandermonde_y = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[2]) - vandermonde_z = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[3]) - for v in 1:n_variables - for i in 1:n_nodes - for ii in 1:n_nodes - temp_data[i, ii, element, v] = (vandermonde_z*unstructured_data[i, ii, :, element_ids[element], v])[1] + # Set nodes according to the length of the curve. + arc_length = calc_arc_length(curve) + + # Setup data structures. + data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) + temp_data = Array{Float64}(undef, n_nodes, n_nodes + 1, n_points_curve, n_variables) + + # For each coordinate find the corresponding element with its id. + element_ids = get_elements_by_coordinates(curve, mesh, solver, cache) + + # Iterate over all found elements. + for element in 1:n_points_curve + min_coordinate = original_nodes[:, 1, 1, 1, element_ids[element]] + max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, + element_ids[element]] + element_length = max_coordinate - min_coordinate + + normalized_coordinates = (curve[:, element] - min_coordinate) / + element_length[1] * 2 .- 1 + + # Interpolate to a single point in each element. + vandermonde_x = polynomial_interpolation_matrix(nodes_in, + normalized_coordinates[1]) + vandermonde_y = polynomial_interpolation_matrix(nodes_in, + normalized_coordinates[2]) + vandermonde_z = polynomial_interpolation_matrix(nodes_in, + normalized_coordinates[3]) + for v in 1:n_variables + for i in 1:n_nodes + for ii in 1:n_nodes + temp_data[i, ii, element, v] = (vandermonde_z * unstructured_data[i, + ii, + :, + element_ids[element], + v])[1] + end + temp_data[i, n_nodes + 1, element, v] = (vandermonde_y * temp_data[i, + 1:n_nodes, + element, + v])[1] + end + data_on_curve[element, v] = (vandermonde_x * temp_data[:, n_nodes + 1, + element, v])[1] end - temp_data[i, n_nodes+1, element, v] = (vandermonde_y*temp_data[i, 1:n_nodes, element, v])[1] - end - data_on_curve[element, v] = (vandermonde_x*temp_data[:, n_nodes+1, element, v])[1] end - end - return arc_length, data_on_curve, nothing + return arc_length, data_on_curve, nothing end # Convert 3d unstructured data from a general mesh to 1d data at given curve. function unstructured_3d_to_1d_curve(nodes, data, curve, slice, point, nvisnodes) - # If no curve is defined, create a axis curve. - if curve === nothing - curve = axis_curve(nodes[1,:,:,:,:], nodes[2,:,:,:,:], nodes[3,:,:,:,:], slice, point, nvisnodes) - end + # If no curve is defined, create a axis curve. + if curve === nothing + curve = axis_curve(nodes[1, :, :, :, :], nodes[2, :, :, :, :], + nodes[3, :, :, :, :], slice, point, nvisnodes) + end - # Set up data structure. - n_points_curve = size(curve, 2) - n_variables = size(data, 1) - data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) + # Set up data structure. + n_points_curve = size(curve, 2) + n_variables = size(data, 1) + data_on_curve = Array{Float64}(undef, n_points_curve, n_variables) - # Iterate over every point on the curve and determine the solutions value at given point. - for i in 1:n_points_curve - @views data_on_curve[i, :] .= get_value_at_point(curve[:,i], nodes, data) - end + # Iterate over every point on the curve and determine the solutions value at given point. + for i in 1:n_points_curve + @views data_on_curve[i, :] .= get_value_at_point(curve[:, i], nodes, data) + end - mesh_vertices_x = nothing + mesh_vertices_x = nothing - return calc_arc_length(curve), data_on_curve, mesh_vertices_x + return calc_arc_length(curve), data_on_curve, mesh_vertices_x end # Check if the first 'amount'-many points can still form a valid tetrahedron. -function is_valid_tetrahedron(amount, coordinates; tol=10^-4) - a = coordinates[:,1]; b = coordinates[:,2]; c = coordinates[:,3]; d = coordinates[:,4]; - if amount == 2 # If two points are the same, then no tetrahedron can be formed. - return !(isapprox(a, b; atol=tol)) - elseif amount == 3 # Check if three points are on the same line. - return !on_the_same_line(a, b, c; tol=tol) - elseif amount == 4 # Check if four points form a tetrahedron. - A = hcat(coordinates[1, :], coordinates[2, :], coordinates[3, :], SVector(1, 1, 1, 1)) - return !isapprox(det(A), 0; atol=tol) - else # With one point a tetrahedron can always be formed. - return true - end +function is_valid_tetrahedron(amount, coordinates; tol = 10^-4) + a = coordinates[:, 1] + b = coordinates[:, 2] + c = coordinates[:, 3] + d = coordinates[:, 4] + if amount == 2 # If two points are the same, then no tetrahedron can be formed. + return !(isapprox(a, b; atol = tol)) + elseif amount == 3 # Check if three points are on the same line. + return !on_the_same_line(a, b, c; tol = tol) + elseif amount == 4 # Check if four points form a tetrahedron. + A = hcat(coordinates[1, :], coordinates[2, :], coordinates[3, :], + SVector(1, 1, 1, 1)) + return !isapprox(det(A), 0; atol = tol) + else # With one point a tetrahedron can always be formed. + return true + end end # Check if three given 3D-points are on the same line. -function on_the_same_line(a, b, c; tol=10^-4) - # Calculate the intersection of the a-b-axis at x=0. - if b[1] == 0 - intersect_a_b = b - else - intersect_a_b = a - b.*(a[1]/b[1]) - end - # Calculate the intersection of the a-c-axis at x=0. - if c[1] == 0 - intersect_a_c = c - else - intersect_a_c = a - c.*(a[1]/c[1]) - end - return isapprox(intersect_a_b, intersect_a_c; atol=tol) +function on_the_same_line(a, b, c; tol = 10^-4) + # Calculate the intersection of the a-b-axis at x=0. + if b[1] == 0 + intersect_a_b = b + else + intersect_a_b = a - b .* (a[1] / b[1]) + end + # Calculate the intersection of the a-c-axis at x=0. + if c[1] == 0 + intersect_a_c = c + else + intersect_a_c = a - c .* (a[1] / c[1]) + end + return isapprox(intersect_a_b, intersect_a_c; atol = tol) end # Interpolate from four corners of a tetrahedron to a single point. -function tetrahedron_interpolation(x_coordinates_in, y_coordinates_in, z_coordinates_in, values_in, coordinate_out) - A = hcat(x_coordinates_in, y_coordinates_in, z_coordinates_in, SVector(1, 1, 1, 1)) - c = A \ values_in - return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3] * coordinate_out[3] + c[4] +function tetrahedron_interpolation(x_coordinates_in, y_coordinates_in, z_coordinates_in, + values_in, coordinate_out) + A = hcat(x_coordinates_in, y_coordinates_in, z_coordinates_in, SVector(1, 1, 1, 1)) + c = A \ values_in + return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + + c[3] * coordinate_out[3] + c[4] end # Calculate the distances from every entry in node to given point. function distances_from_single_point(nodes, point) - _, n_nodes, _, _, n_elements = size(nodes) - shifted_data = nodes.-point - distances = zeros(n_nodes, n_nodes, n_nodes, n_elements) - - # Iterate over every entry. - for element in 1:n_elements - for x in 1:n_nodes - for y in 1:n_nodes - for z in 1:n_nodes - distances[x,y,z,element] = norm(shifted_data[:,x,y,z,element]) + _, n_nodes, _, _, n_elements = size(nodes) + shifted_data = nodes .- point + distances = zeros(n_nodes, n_nodes, n_nodes, n_elements) + + # Iterate over every entry. + for element in 1:n_elements + for x in 1:n_nodes + for y in 1:n_nodes + for z in 1:n_nodes + distances[x, y, z, element] = norm(shifted_data[:, x, y, z, + element]) + end + end end - end end - end - return distances + return distances end # Interpolate the data on given nodes to a single value at given point. function get_value_at_point(point, nodes, data) - # Set up ata structures. - n_variables, n_x_nodes, n_y_nodes, n_z_nodes, _ = size(data) - distances = distances_from_single_point(nodes, point) - maximum_distance = maximum(distances) + # Set up ata structures. + n_variables, n_x_nodes, n_y_nodes, n_z_nodes, _ = size(data) + distances = distances_from_single_point(nodes, point) + maximum_distance = maximum(distances) + + coordinates_tetrahedron = Array{Float64, 2}(undef, 3, 4) + value_tetrahedron = Array{Float64}(undef, n_variables, 4) + + index = argmin(distances) - coordinates_tetrahedron = Array{Float64, 2}(undef, 3, 4) - value_tetrahedron = Array{Float64}(undef, n_variables, 4) + # If the point sits exactly on a node, no interpolation is needed. + if nodes[:, index[1], index[2], index[3], index[4]] == point + return data[1, index[1], index[2], index[3], index[4]] + end - index = argmin(distances) + @views coordinates_tetrahedron[:, 1] = nodes[:, index[1], index[2], index[3], + index[4]] + @views value_tetrahedron[:, 1] = data[:, index[1], index[2], index[3], index[4]] + + # Restrict the interpolation to the closest element only. + closest_element = index[4] + @views element_distances = distances[:, :, :, closest_element] + + # Find a tetrahedron, which is given by four corners, to interpolate from. + for i in 1:4 + # Iterate until a valid tetrahedron is found. + while true + index = argmin(element_distances) + element_distances[index[1], index[2], index[3]] = maximum_distance + + @views coordinates_tetrahedron[:, i] = nodes[:, index[1], index[2], + index[3], closest_element] + @views value_tetrahedron[:, i] = data[:, index[1], index[2], index[3], + closest_element] + + # Look for another point if current tetrahedron is not valid. + if is_valid_tetrahedron(i, coordinates_tetrahedron) + break + end + end + end - # If the point sits exactly on a node, no interpolation is needed. - if nodes[:, index[1], index[2], index[3], index[4]] == point - return data[1, index[1], index[2], index[3], index[4]] - end + # Interpolate from tetrahedron to given point. + value_at_point = Array{Float64}(undef, n_variables) + for v in 1:n_variables + value_at_point[v] = tetrahedron_interpolation(coordinates_tetrahedron[1, :], + coordinates_tetrahedron[2, :], + coordinates_tetrahedron[3, :], + value_tetrahedron[v, :], point) + end - @views coordinates_tetrahedron[:,1] = nodes[:, index[1], index[2], index[3], index[4]] - @views value_tetrahedron[:, 1] = data[:, index[1], index[2], index[3], index[4]] + return value_at_point +end - # Restrict the interpolation to the closest element only. - closest_element = index[4] - @views element_distances = distances[:,:,:,closest_element] +# Convert 3d unstructured data to 1d slice and interpolate them. +function unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, + point) + if slice === :x + slice_dimension = 1 + other_dimensions = [2, 3] + elseif slice === :y + slice_dimension = 2 + other_dimensions = [1, 3] + elseif slice === :z + slice_dimension = 3 + other_dimensions = [1, 2] + else + error("illegal dimension '$slice', supported dimensions are :x, :y and :z") + end - # Find a tetrahedron, which is given by four corners, to interpolate from. - for i in 1:4 - # Iterate until a valid tetrahedron is found. - while true - index = argmin(element_distances) - element_distances[index[1], index[2], index[3]] = maximum_distance + # Set up data structures to store new 1D data. + @views new_unstructured_data = similar(unstructured_data[1, 1, ..]) + @views temp_unstructured_data = similar(unstructured_data[1, ..]) + @views new_nodes = similar(original_nodes[1, 1, 1, ..]) - @views coordinates_tetrahedron[:,i] = nodes[:, index[1], index[2], index[3], closest_element] - @views value_tetrahedron[:, i] = data[:, index[1], index[2], index[3], closest_element] + n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data) + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - # Look for another point if current tetrahedron is not valid. - if is_valid_tetrahedron(i, coordinates_tetrahedron) - break - end + # Test if point lies in the domain. + lower_limit = original_nodes[1, 1, 1, 1, 1] + upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_nodes_in, n_elements] + + @assert length(point)>=3 "Point must be three-dimensional." + if prod(point[other_dimensions] .< lower_limit) || + prod(point[other_dimensions] .> upper_limit) + error(string("Slice axis is outside of domain. ", + " point[$other_dimensions]=$(point[other_dimensions]) must be between $lower_limit and $upper_limit")) end - end - # Interpolate from tetrahedron to given point. - value_at_point = Array{Float64}(undef, n_variables) - for v in 1:n_variables - value_at_point[v] = tetrahedron_interpolation(coordinates_tetrahedron[1, :], coordinates_tetrahedron[2, :], coordinates_tetrahedron[3, :], value_tetrahedron[v, :], point) - end + # Count the amount of new elements. + new_id = 0 - return value_at_point -end + # Permute dimensions so that the slice dimensions are always the in correct places for later use. + if slice === :x + original_nodes = permutedims(original_nodes, [1, 3, 4, 2, 5]) + unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5]) + elseif slice === :y + original_nodes = permutedims(original_nodes, [1, 2, 4, 3, 5]) + unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5]) + end -# Convert 3d unstructured data to 1d slice and interpolate them. -function unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point) - - if slice === :x - slice_dimension = 1 - other_dimensions = [2,3] - elseif slice === :y - slice_dimension = 2 - other_dimensions = [1,3] - elseif slice === :z - slice_dimension = 3 - other_dimensions = [1,2] - else - error("illegal dimension '$slice', supported dimensions are :x, :y and :z") - end - - # Set up data structures to store new 1D data. - @views new_unstructured_data = similar(unstructured_data[1, 1, ..]) - @views temp_unstructured_data = similar(unstructured_data[1, ..]) - @views new_nodes = similar(original_nodes[1, 1, 1,..]) - - n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data) - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - - # Test if point lies in the domain. - lower_limit = original_nodes[1, 1, 1, 1, 1] - upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_nodes_in, n_elements] - - @assert length(point) >= 3 "Point must be three-dimensional." - if prod(point[other_dimensions] .< lower_limit) || prod(point[other_dimensions] .> upper_limit) - error(string("Slice axis is outside of domain. ", - " point[$other_dimensions]=$(point[other_dimensions]) must be between $lower_limit and $upper_limit")) - end - - # Count the amount of new elements. - new_id = 0 - - # Permute dimensions so that the slice dimensions are always the in correct places for later use. - if slice === :x - original_nodes = permutedims(original_nodes, [1, 3, 4, 2, 5]) - unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5]) - elseif slice === :y - original_nodes = permutedims(original_nodes, [1, 2, 4, 3, 5]) - unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5]) - end - - # Iterate over all elements to find the ones that lie on the slice axis. - for element_id in 1:n_elements - min_coordinate = original_nodes[:, 1, 1, 1, element_id] - max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, n_nodes_in, element_id] - element_length = max_coordinate - min_coordinate - - # Test if the element is on the slice axis. If not just continue with the next element. - if !((prod(min_coordinate[other_dimensions] .<= point[other_dimensions]) && - prod(max_coordinate[other_dimensions] .> point[other_dimensions])) || - (point[other_dimensions] == upper_limit && prod(max_coordinate[other_dimensions] .== upper_limit))) - - continue - end - - new_id += 1 - - # Construct vandermonde matrix for interpolation of each 2D element to a 1D element. - normalized_intercept = - (point[other_dimensions] .- min_coordinate[other_dimensions]) / - element_length[1] * 2 .- 1 - vandermonde_i = polynomial_interpolation_matrix(nodes_in, normalized_intercept[1]) - vandermonde_ii = polynomial_interpolation_matrix(nodes_in, normalized_intercept[2]) - - # Interpolate to each node of new 1D element. - for v in 1:n_variables - for i in 1:n_nodes_in - for ii in 1:n_nodes_in - temp_unstructured_data[i, ii, new_id, v] = (vandermonde_ii*unstructured_data[ii, :, i, element_id, v])[1] + # Iterate over all elements to find the ones that lie on the slice axis. + for element_id in 1:n_elements + min_coordinate = original_nodes[:, 1, 1, 1, element_id] + max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, n_nodes_in, + element_id] + element_length = max_coordinate - min_coordinate + + # Test if the element is on the slice axis. If not just continue with the next element. + if !((prod(min_coordinate[other_dimensions] .<= point[other_dimensions]) && + prod(max_coordinate[other_dimensions] .> point[other_dimensions])) || + (point[other_dimensions] == upper_limit && + prod(max_coordinate[other_dimensions] .== upper_limit))) + continue end - new_unstructured_data[i, new_id, v] = (vandermonde_i*temp_unstructured_data[i, :, new_id, v])[1] - end - end - new_nodes[:, new_id] = original_nodes[slice_dimension, 1, 1, :, element_id] - end + new_id += 1 + + # Construct vandermonde matrix for interpolation of each 2D element to a 1D element. + normalized_intercept = (point[other_dimensions] .- + min_coordinate[other_dimensions]) / + element_length[1] * 2 .- 1 + vandermonde_i = polynomial_interpolation_matrix(nodes_in, + normalized_intercept[1]) + vandermonde_ii = polynomial_interpolation_matrix(nodes_in, + normalized_intercept[2]) + + # Interpolate to each node of new 1D element. + for v in 1:n_variables + for i in 1:n_nodes_in + for ii in 1:n_nodes_in + temp_unstructured_data[i, ii, new_id, v] = (vandermonde_ii * unstructured_data[ii, + :, + i, + element_id, + v])[1] + end + new_unstructured_data[i, new_id, v] = (vandermonde_i * temp_unstructured_data[i, + :, + new_id, + v])[1] + end + end + + new_nodes[:, new_id] = original_nodes[slice_dimension, 1, 1, :, element_id] + end - return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), new_unstructured_data[:, 1:new_id, :], nvisnodes) + return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), + new_unstructured_data[:, 1:new_id, :], nvisnodes) end # Interpolate unstructured DG data to structured data (cell-centered) @@ -1082,264 +1174,270 @@ end # thus be changed in future releases. function unstructured2structured(unstructured_data, normalized_coordinates, levels, resolution, nvisnodes_per_level) - # Extract data shape information - n_nodes_in, _, n_elements, n_variables = size(unstructured_data) - - # Get node coordinates for DG locations on reference element - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - - # Calculate interpolation vandermonde matrices for each level - max_level = length(nvisnodes_per_level) - 1 - vandermonde_per_level = [] - for l in 0:max_level - n_nodes_out = nvisnodes_per_level[l + 1] - dx = 2 / n_nodes_out - nodes_out = collect(range(-1 + dx/2, 1 - dx/2, length=n_nodes_out)) - push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out)) - end - - # For each element, calculate index position at which to insert data in global data structure - lower_left_index = element2index(normalized_coordinates, levels, resolution, nvisnodes_per_level) - - # Create output data structure - structured = [Matrix{Float64}(undef, resolution, resolution) for _ in 1:n_variables] + # Extract data shape information + n_nodes_in, _, n_elements, n_variables = size(unstructured_data) + + # Get node coordinates for DG locations on reference element + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) + + # Calculate interpolation vandermonde matrices for each level + max_level = length(nvisnodes_per_level) - 1 + vandermonde_per_level = [] + for l in 0:max_level + n_nodes_out = nvisnodes_per_level[l + 1] + dx = 2 / n_nodes_out + nodes_out = collect(range(-1 + dx / 2, 1 - dx / 2, length = n_nodes_out)) + push!(vandermonde_per_level, + polynomial_interpolation_matrix(nodes_in, nodes_out)) + end - # For each variable, interpolate element data and store to global data structure - for v in 1:n_variables - # Reshape data array for use in multiply_dimensionwise function - reshaped_data = reshape(unstructured_data[:, :, :, v], 1, n_nodes_in, n_nodes_in, n_elements) + # For each element, calculate index position at which to insert data in global data structure + lower_left_index = element2index(normalized_coordinates, levels, resolution, + nvisnodes_per_level) - for element_id in 1:n_elements - # Extract level for convenience - level = levels[element_id] + # Create output data structure + structured = [Matrix{Float64}(undef, resolution, resolution) for _ in 1:n_variables] - # Determine target indices - n_nodes_out = nvisnodes_per_level[level + 1] - first = lower_left_index[:, element_id] - last = first .+ (n_nodes_out - 1) - - # Interpolate data - vandermonde = vandermonde_per_level[level + 1] - structured[v][first[1]:last[1], first[2]:last[2]] .= ( - reshape(multiply_dimensionwise(vandermonde, reshaped_data[:, :, :, element_id]), - n_nodes_out, n_nodes_out)) + # For each variable, interpolate element data and store to global data structure + for v in 1:n_variables + # Reshape data array for use in multiply_dimensionwise function + reshaped_data = reshape(unstructured_data[:, :, :, v], 1, n_nodes_in, + n_nodes_in, n_elements) + + for element_id in 1:n_elements + # Extract level for convenience + level = levels[element_id] + + # Determine target indices + n_nodes_out = nvisnodes_per_level[level + 1] + first = lower_left_index[:, element_id] + last = first .+ (n_nodes_out - 1) + + # Interpolate data + vandermonde = vandermonde_per_level[level + 1] + structured[v][first[1]:last[1], first[2]:last[2]] .= (reshape(multiply_dimensionwise(vandermonde, + reshaped_data[:, + :, + :, + element_id]), + n_nodes_out, + n_nodes_out)) + end end - end - return structured + return structured end - # For a given normalized element coordinate, return the index of its lower left # contribution to the global data structure # # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function element2index(normalized_coordinates, levels, resolution, nvisnodes_per_level) - @assert size(normalized_coordinates, 1) == 2 "only works in 2D" - - n_elements = length(levels) - - # First, determine lower left coordinate for all cells - dx = 2 / resolution - ndim = 2 - lower_left_coordinate = Array{Float64}(undef, ndim, n_elements) - for element_id in 1:n_elements - nvisnodes = nvisnodes_per_level[levels[element_id] + 1] - lower_left_coordinate[1, element_id] = ( - normalized_coordinates[1, element_id] - (nvisnodes - 1)/2 * dx) - lower_left_coordinate[2, element_id] = ( - normalized_coordinates[2, element_id] - (nvisnodes - 1)/2 * dx) - end - - # Then, convert coordinate to global index - indices = coordinate2index(lower_left_coordinate, resolution) - - return indices -end + @assert size(normalized_coordinates, 1)==2 "only works in 2D" + n_elements = length(levels) + + # First, determine lower left coordinate for all cells + dx = 2 / resolution + ndim = 2 + lower_left_coordinate = Array{Float64}(undef, ndim, n_elements) + for element_id in 1:n_elements + nvisnodes = nvisnodes_per_level[levels[element_id] + 1] + lower_left_coordinate[1, element_id] = (normalized_coordinates[1, element_id] - + (nvisnodes - 1) / 2 * dx) + lower_left_coordinate[2, element_id] = (normalized_coordinates[2, element_id] - + (nvisnodes - 1) / 2 * dx) + end + + # Then, convert coordinate to global index + indices = coordinate2index(lower_left_coordinate, resolution) + + return indices +end # Find 2D array index for a 2-tuple of normalized, cell-centered coordinates (i.e., in [-1,1]) # # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function coordinate2index(coordinate, resolution::Integer) - # Calculate 1D normalized coordinates - dx = 2/resolution - mesh_coordinates = collect(range(-1 + dx/2, 1 - dx/2, length=resolution)) - - # Find index - id_x = searchsortedfirst.(Ref(mesh_coordinates), coordinate[1, :], lt=(x,y)->x .< y .- dx/2) - id_y = searchsortedfirst.(Ref(mesh_coordinates), coordinate[2, :], lt=(x,y)->x .< y .- dx/2) - return transpose(hcat(id_x, id_y)) + # Calculate 1D normalized coordinates + dx = 2 / resolution + mesh_coordinates = collect(range(-1 + dx / 2, 1 - dx / 2, length = resolution)) + + # Find index + id_x = searchsortedfirst.(Ref(mesh_coordinates), coordinate[1, :], + lt = (x, y) -> x .< y .- dx / 2) + id_y = searchsortedfirst.(Ref(mesh_coordinates), coordinate[2, :], + lt = (x, y) -> x .< y .- dx / 2) + return transpose(hcat(id_x, id_y)) end - # Calculate the vertices for each mesh cell such that it can be visualized as a closed box # # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function calc_vertices(coordinates, levels, length_level_0) - ndim = size(coordinates, 1) - @assert ndim == 2 "only works in 2D" - - # Initialize output arrays - n_elements = length(levels) - n_points_per_element = 2^ndim+2 - x = Vector{Float64}(undef, n_points_per_element*n_elements) - y = Vector{Float64}(undef, n_points_per_element*n_elements) - - # Calculate vertices for all coordinates at once - for element_id in 1:n_elements - length = length_level_0 / 2^levels[element_id] - index = n_points_per_element*(element_id-1) - x[index+1] = coordinates[1, element_id] - 1/2 * length - x[index+2] = coordinates[1, element_id] + 1/2 * length - x[index+3] = coordinates[1, element_id] + 1/2 * length - x[index+4] = coordinates[1, element_id] - 1/2 * length - x[index+5] = coordinates[1, element_id] - 1/2 * length - x[index+6] = NaN - - y[index+1] = coordinates[2, element_id] - 1/2 * length - y[index+2] = coordinates[2, element_id] - 1/2 * length - y[index+3] = coordinates[2, element_id] + 1/2 * length - y[index+4] = coordinates[2, element_id] + 1/2 * length - y[index+5] = coordinates[2, element_id] - 1/2 * length - y[index+6] = NaN - end - - return x, y -end + ndim = size(coordinates, 1) + @assert ndim==2 "only works in 2D" + # Initialize output arrays + n_elements = length(levels) + n_points_per_element = 2^ndim + 2 + x = Vector{Float64}(undef, n_points_per_element * n_elements) + y = Vector{Float64}(undef, n_points_per_element * n_elements) + + # Calculate vertices for all coordinates at once + for element_id in 1:n_elements + length = length_level_0 / 2^levels[element_id] + index = n_points_per_element * (element_id - 1) + x[index + 1] = coordinates[1, element_id] - 1 / 2 * length + x[index + 2] = coordinates[1, element_id] + 1 / 2 * length + x[index + 3] = coordinates[1, element_id] + 1 / 2 * length + x[index + 4] = coordinates[1, element_id] - 1 / 2 * length + x[index + 5] = coordinates[1, element_id] - 1 / 2 * length + x[index + 6] = NaN + + y[index + 1] = coordinates[2, element_id] - 1 / 2 * length + y[index + 2] = coordinates[2, element_id] - 1 / 2 * length + y[index + 3] = coordinates[2, element_id] + 1 / 2 * length + y[index + 4] = coordinates[2, element_id] + 1 / 2 * length + y[index + 5] = coordinates[2, element_id] - 1 / 2 * length + y[index + 6] = NaN + end + + return x, y +end # Calculate the vertices to plot each grid line for StructuredMesh # # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may # thus be changed in future releases. function calc_vertices(node_coordinates, mesh) - @unpack cells_per_dimension = mesh - @assert size(node_coordinates, 1) == 2 "only works in 2D" - - linear_indices = LinearIndices(size(mesh)) - - # Initialize output arrays - n_lines = sum(cells_per_dimension) + 2 - max_length = maximum(cells_per_dimension) - n_nodes = size(node_coordinates, 2) - - # Create output as two matrices `x` and `y`, each holding the node locations for each of the `n_lines` grid lines - # The # of rows in the matrices must be sufficient to store the longest dimension (`max_length`), - # and for each the node locations without doubling the corner nodes (`n_nodes-1`), plus the final node (`+1`) - # Rely on Plots.jl to ignore `NaN`s (i.e., they are not plotted) to handle shorter lines - x = fill(NaN, max_length*(n_nodes-1)+1, n_lines) - y = fill(NaN, max_length*(n_nodes-1)+1, n_lines) - - line_index = 1 - # Lines in x-direction - # Bottom boundary - i = 1 - for cell_x in axes(mesh, 1) - for node in 1:(n_nodes-1) - x[i, line_index] = node_coordinates[1, node, 1, linear_indices[cell_x, 1]] - y[i, line_index] = node_coordinates[2, node, 1, linear_indices[cell_x, 1]] - - i += 1 - end - end - # Last point on bottom boundary - x[i, line_index] = node_coordinates[1, end, 1, linear_indices[end, 1]] - y[i, line_index] = node_coordinates[2, end, 1, linear_indices[end, 1]] - - # Other lines in x-direction - line_index += 1 - for cell_y in axes(mesh, 2) + @unpack cells_per_dimension = mesh + @assert size(node_coordinates, 1)==2 "only works in 2D" + + linear_indices = LinearIndices(size(mesh)) + + # Initialize output arrays + n_lines = sum(cells_per_dimension) + 2 + max_length = maximum(cells_per_dimension) + n_nodes = size(node_coordinates, 2) + + # Create output as two matrices `x` and `y`, each holding the node locations for each of the `n_lines` grid lines + # The # of rows in the matrices must be sufficient to store the longest dimension (`max_length`), + # and for each the node locations without doubling the corner nodes (`n_nodes-1`), plus the final node (`+1`) + # Rely on Plots.jl to ignore `NaN`s (i.e., they are not plotted) to handle shorter lines + x = fill(NaN, max_length * (n_nodes - 1) + 1, n_lines) + y = fill(NaN, max_length * (n_nodes - 1) + 1, n_lines) + + line_index = 1 + # Lines in x-direction + # Bottom boundary i = 1 for cell_x in axes(mesh, 1) - for node in 1:(n_nodes-1) - x[i, line_index] = node_coordinates[1, node, end, linear_indices[cell_x, cell_y]] - y[i, line_index] = node_coordinates[2, node, end, linear_indices[cell_x, cell_y]] + for node in 1:(n_nodes - 1) + x[i, line_index] = node_coordinates[1, node, 1, linear_indices[cell_x, 1]] + y[i, line_index] = node_coordinates[2, node, 1, linear_indices[cell_x, 1]] - i += 1 - end + i += 1 + end end - # Last point on line - x[i, line_index] = node_coordinates[1, end, end, linear_indices[end, cell_y]] - y[i, line_index] = node_coordinates[2, end, end, linear_indices[end, cell_y]] + # Last point on bottom boundary + x[i, line_index] = node_coordinates[1, end, 1, linear_indices[end, 1]] + y[i, line_index] = node_coordinates[2, end, 1, linear_indices[end, 1]] + # Other lines in x-direction line_index += 1 - end - - - # Lines in y-direction - # Left boundary - i = 1 - for cell_y in axes(mesh, 2) - for node in 1:(n_nodes-1) - x[i, line_index] = node_coordinates[1, 1, node, linear_indices[1, cell_y]] - y[i, line_index] = node_coordinates[2, 1, node, linear_indices[1, cell_y]] + for cell_y in axes(mesh, 2) + i = 1 + for cell_x in axes(mesh, 1) + for node in 1:(n_nodes - 1) + x[i, line_index] = node_coordinates[1, node, end, + linear_indices[cell_x, cell_y]] + y[i, line_index] = node_coordinates[2, node, end, + linear_indices[cell_x, cell_y]] + + i += 1 + end + end + # Last point on line + x[i, line_index] = node_coordinates[1, end, end, linear_indices[end, cell_y]] + y[i, line_index] = node_coordinates[2, end, end, linear_indices[end, cell_y]] - i += 1 + line_index += 1 end - end - # Last point on left boundary - x[i, line_index] = node_coordinates[1, 1, end, linear_indices[1, end]] - y[i, line_index] = node_coordinates[2, 1, end, linear_indices[1, end]] - # Other lines in y-direction - line_index +=1 - for cell_x in axes(mesh, 1) + # Lines in y-direction + # Left boundary i = 1 for cell_y in axes(mesh, 2) - for node in 1:(n_nodes-1) - x[i, line_index] = node_coordinates[1, end, node, linear_indices[cell_x, cell_y]] - y[i, line_index] = node_coordinates[2, end, node, linear_indices[cell_x, cell_y]] + for node in 1:(n_nodes - 1) + x[i, line_index] = node_coordinates[1, 1, node, linear_indices[1, cell_y]] + y[i, line_index] = node_coordinates[2, 1, node, linear_indices[1, cell_y]] - i += 1 - end + i += 1 + end end - # Last point on line - x[i, line_index] = node_coordinates[1, end, end, linear_indices[cell_x, end]] - y[i, line_index] = node_coordinates[2, end, end, linear_indices[cell_x, end]] + # Last point on left boundary + x[i, line_index] = node_coordinates[1, 1, end, linear_indices[1, end]] + y[i, line_index] = node_coordinates[2, 1, end, linear_indices[1, end]] + # Other lines in y-direction line_index += 1 - end + for cell_x in axes(mesh, 1) + i = 1 + for cell_y in axes(mesh, 2) + for node in 1:(n_nodes - 1) + x[i, line_index] = node_coordinates[1, end, node, + linear_indices[cell_x, cell_y]] + y[i, line_index] = node_coordinates[2, end, node, + linear_indices[cell_x, cell_y]] + + i += 1 + end + end + # Last point on line + x[i, line_index] = node_coordinates[1, end, end, linear_indices[cell_x, end]] + y[i, line_index] = node_coordinates[2, end, end, linear_indices[cell_x, end]] - return x, y + line_index += 1 + end + + return x, y end # Convert `slice` to orientations (1 -> `x`, 2 -> `y`, 3 -> `z`) for the two axes in a 2D plot function _get_orientations(mesh, slice) - if ndims(mesh) == 2 || (ndims(mesh) == 3 && slice === :xy) - orientation_x = 1 - orientation_y = 2 - elseif ndims(mesh) == 3 && slice === :xz - orientation_x = 1 - orientation_y = 3 - elseif ndims(mesh) == 3 && slice === :yz - orientation_x = 2 - orientation_y = 3 - else - orientation_x = 0 - orientation_y = 0 - end - return orientation_x, orientation_y + if ndims(mesh) == 2 || (ndims(mesh) == 3 && slice === :xy) + orientation_x = 1 + orientation_y = 2 + elseif ndims(mesh) == 3 && slice === :xz + orientation_x = 1 + orientation_y = 3 + elseif ndims(mesh) == 3 && slice === :yz + orientation_x = 2 + orientation_y = 3 + else + orientation_x = 0 + orientation_y = 0 + end + return orientation_x, orientation_y end - # Convert `orientation` into a guide label (see also `_get_orientations`) function _get_guide(orientation::Integer) - if orientation == 1 - return "\$x\$" - elseif orientation == 2 - return "\$y\$" - elseif orientation == 3 - return "\$z\$" - else - return "" - end + if orientation == 1 + return "\$x\$" + elseif orientation == 2 + return "\$y\$" + elseif orientation == 3 + return "\$z\$" + else + return "" + end end - # plotting_interpolation_matrix(dg; kwargs...) # # Interpolation matrix which maps discretization nodes to a set of plotting nodes. @@ -1356,121 +1454,125 @@ end # to define a multi-dimensional interpolation matrix later. plotting_interpolation_matrix(dg; kwargs...) = I(length(dg.basis.nodes)) -function face_plotting_interpolation_matrix(dg::DGSEM; nvisnodes=2*length(dg.basis.nodes)) - return polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes)) +function face_plotting_interpolation_matrix(dg::DGSEM; + nvisnodes = 2 * length(dg.basis.nodes)) + return polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes)) end -function plotting_interpolation_matrix(dg::DGSEM; nvisnodes=2*length(dg.basis.nodes)) - Vp1D = polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes)) - # For quadrilateral elements, interpolation to plotting nodes involves applying a 1D interpolation - # operator to each line of nodes. This is equivalent to multiplying the vector containing all node - # node coordinates on an element by a Kronecker product of the 1D interpolation operator (e.g., a - # multi-dimensional interpolation operator). - return kron(Vp1D, Vp1D) +function plotting_interpolation_matrix(dg::DGSEM; + nvisnodes = 2 * length(dg.basis.nodes)) + Vp1D = polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes)) + # For quadrilateral elements, interpolation to plotting nodes involves applying a 1D interpolation + # operator to each line of nodes. This is equivalent to multiplying the vector containing all node + # node coordinates on an element by a Kronecker product of the 1D interpolation operator (e.g., a + # multi-dimensional interpolation operator). + return kron(Vp1D, Vp1D) end function reference_node_coordinates_2d(dg::DGSEM) - @unpack nodes = dg.basis - r = vec([nodes[i] for i in eachnode(dg), j in eachnode(dg)]) - s = vec([nodes[j] for i in eachnode(dg), j in eachnode(dg)]) - return r, s + @unpack nodes = dg.basis + r = vec([nodes[i] for i in eachnode(dg), j in eachnode(dg)]) + s = vec([nodes[j] for i in eachnode(dg), j in eachnode(dg)]) + return r, s end - - # Find element and triangle ids containing coordinates given as a matrix [ndims, npoints] function get_ids_by_coordinates!(ids, coordinates, pd) - if length(ids) != 2 * size(coordinates, 2) - throw(DimensionMismatch("storage length for element ids does not match the number of coordinates")) - end + if length(ids) != 2 * size(coordinates, 2) + throw(DimensionMismatch("storage length for element ids does not match the number of coordinates")) + end - n_coordinates = size(coordinates, 2) + n_coordinates = size(coordinates, 2) - for index in 1:n_coordinates - ids[index, :] .= find_element(coordinates[:, index], pd) - end + for index in 1:n_coordinates + ids[index, :] .= find_element(coordinates[:, index], pd) + end - return ids + return ids end # Find the ids of elements and triangles containing given coordinates by using the triangulation in 'pd'. function get_ids_by_coordinates(coordinates, pd) - ids = Matrix(undef, size(coordinates, 2), 2) - get_ids_by_coordinates!(ids, coordinates, pd) - return ids + ids = Matrix(undef, size(coordinates, 2), 2) + get_ids_by_coordinates!(ids, coordinates, pd) + return ids end # Check if given 'point' is inside the triangle with corners corresponding to the coordinates of x and y. function is_in_triangle(point, x, y) - a = SVector(x[1], y[1]); b = SVector(x[2], y[2]); c = SVector(x[3], y[3]) - return is_on_same_side(point, a, b, c) && is_on_same_side(point, b, c, a) && is_on_same_side(point, c, a, b) + a = SVector(x[1], y[1]) + b = SVector(x[2], y[2]) + c = SVector(x[3], y[3]) + return is_on_same_side(point, a, b, c) && is_on_same_side(point, b, c, a) && + is_on_same_side(point, c, a, b) end # Create an axis through x and y to then check if 'point' is on the same side of the axis as z. function is_on_same_side(point, x, y, z) - if (y[1] - x[1]) == 0 - return (point[1] - x[1]) * (z[1] - x[1]) >= 0 - else - a = (y[2] - x[2]) / (y[1] - x[1]) - b = x[2] - a * x[1] - return (z[2] - a * z[1] - b) * (point[2] - a * point[1] - b) >= 0 - end + if (y[1] - x[1]) == 0 + return (point[1] - x[1]) * (z[1] - x[1]) >= 0 + else + a = (y[2] - x[2]) / (y[1] - x[1]) + b = x[2] - a * x[1] + return (z[2] - a * z[1] - b) * (point[2] - a * point[1] - b) >= 0 + end end # For a given 'point', return the id of the element it is contained in in; if not found return 0. function find_element(point, pd) - n_tri = size(pd.t, 1) - n_elements = size(pd.x, 2) - - # Iterate over all elements. - for element in 1:n_elements - # Iterate over all triangles in given element. - for tri in 1:n_tri - if is_in_triangle(point, pd.x[pd.t[tri, :], element], pd.y[pd.t[tri, :], element]) - return SVector(element, tri) - end - end - end + n_tri = size(pd.t, 1) + n_elements = size(pd.x, 2) + + # Iterate over all elements. + for element in 1:n_elements + # Iterate over all triangles in given element. + for tri in 1:n_tri + if is_in_triangle(point, pd.x[pd.t[tri, :], element], + pd.y[pd.t[tri, :], element]) + return SVector(element, tri) + end + end + end end # Interpolate from three corners of a triangle to a single point. -function triangle_interpolation(x_coordinates_in, y_coordinates_in, values_in, coordinate_out) - A = hcat(x_coordinates_in, y_coordinates_in, SVector(1, 1, 1)) - c = A \ values_in - return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3] +function triangle_interpolation(x_coordinates_in, y_coordinates_in, values_in, + coordinate_out) + A = hcat(x_coordinates_in, y_coordinates_in, SVector(1, 1, 1)) + c = A \ values_in + return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3] end # Create an axis. function axis_curve(nodes_x, nodes_y, nodes_z, slice, point, n_points) - if n_points == nothing - n_points = 64 - end - dimensions = length(point) - curve = zeros(dimensions, n_points) - if slice == :x - xmin, xmax = extrema(nodes_x) - curve[1, :] .= range(xmin, xmax, length = n_points) - curve[2, :] .= point[2] - if dimensions === 3 - curve[3, :] .= point[3] - end - elseif slice == :y - ymin, ymax = extrema(nodes_y) - curve[1, :] .= point[1] - curve[2, :] .= range(ymin, ymax, length = n_points) - if dimensions === 3 - curve[3, :] .= point[3] - end - elseif slice == :z - zmin, zmax = extrema(nodes_z) - curve[1, :] .= point[1] - curve[2, :] .= point[2] - curve[3, :] .= range(zmin, zmax, length = n_points) - else - @assert false "Input for 'slice' is not supported here." - end - - return curve -end + if n_points == nothing + n_points = 64 + end + dimensions = length(point) + curve = zeros(dimensions, n_points) + if slice == :x + xmin, xmax = extrema(nodes_x) + curve[1, :] .= range(xmin, xmax, length = n_points) + curve[2, :] .= point[2] + if dimensions === 3 + curve[3, :] .= point[3] + end + elseif slice == :y + ymin, ymax = extrema(nodes_y) + curve[1, :] .= point[1] + curve[2, :] .= range(ymin, ymax, length = n_points) + if dimensions === 3 + curve[3, :] .= point[3] + end + elseif slice == :z + zmin, zmax = extrema(nodes_z) + curve[1, :] .= point[1] + curve[2, :] .= point[2] + curve[3, :] .= range(zmin, zmax, length = n_points) + else + @assert false "Input for 'slice' is not supported here." + end + return curve +end end # @muladd diff --git a/src/visualization/visualization.jl b/src/visualization/visualization.jl index c0091efebd5..94d2532cba3 100644 --- a/src/visualization/visualization.jl +++ b/src/visualization/visualization.jl @@ -3,9 +3,14 @@ # we need to opt-in explicitly. # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin +#! format: noindent include("types.jl") include("utilities.jl") include("recipes_plots.jl") +# Add function definitions here such that they can be exported from Trixi.jl and extended in the +# TrixiMakieExt package extension or by the Makie-specific code loaded by Requires.jl +function iplot end +function iplot! end end # @muladd diff --git a/test/runtests.jl b/test/runtests.jl index e56c4d56d0c..f76811dddbf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,13 +25,14 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) cmd = string(Base.julia_cmd()) coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none", cmd) if !(coverage && Sys.iswindows()) && !(coverage && Sys.islinux()) + # We provide a `--heap-size-hint` to avoid/reduce out-of-memory errors during CI testing mpiexec() do cmd - run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes $(abspath("test_mpi.jl"))`) + run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=1G $(abspath("test_mpi.jl"))`) end end end - @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded" + @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded" || TRIXI_TEST == "threaded_legacy" # Do a dummy `@test true`: # If the process errors out the testset would error out as well, # cf. https://github.com/JuliaParallel/MPI.jl/pull/391 diff --git a/test/test_dgmulti_2d.jl b/test/test_dgmulti_2d.jl index 302dbebc8ff..861e30045ce 100644 --- a/test/test_dgmulti_2d.jl +++ b/test/test_dgmulti_2d.jl @@ -95,8 +95,9 @@ isdir(outdir) && rm(outdir, recursive=true) @trixi_testset "elixir_euler_curved.jl (Quadrilateral elements, GaussSBP, flux differencing)" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_curved.jl"), approximation_type = GaussSBP(), - l2 = [3.4666312082010235e-6, 3.439277448411873e-6, 3.439277448308561e-6, 1.0965598425655705e-5], - linf = [1.1327280369899384e-5, 1.1343911921146699e-5, 1.1343911907157889e-5, 3.6795826181545976e-5] + l2 = [3.4666312079259457e-6, 3.4392774480368986e-6, 3.439277447953705e-6, 1.0965598424665836e-5], + linf = [1.1327280377004811e-5, 1.1343911926253725e-5, 1.1343911906935844e-5, 3.679582619220412e-5], + rtol = 2 * sqrt(eps()) ) end diff --git a/test/test_mpi.jl b/test/test_mpi.jl index 10895665f23..34febf7e268 100644 --- a/test/test_mpi.jl +++ b/test/test_mpi.jl @@ -20,8 +20,8 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows() include("test_mpi_tree.jl") # P4estMesh tests + include("test_mpi_p4est_2d.jl") if !CI_ON_WINDOWS # see comment on `CI_ON_WINDOWS` above - include("test_mpi_p4est_2d.jl") include("test_mpi_p4est_3d.jl") end end # MPI diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl index 588f43e4543..b0ac63d4ce9 100644 --- a/test/test_parabolic_2d.jl +++ b/test/test_parabolic_2d.jl @@ -184,6 +184,22 @@ isdir(outdir) && rm(outdir, recursive=true) ) end + @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic.jl" begin + @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic.jl"), + trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5), + l2 = [0.0023754695605828443], + linf = [0.008154128363741964] + ) + end + + @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic_curved.jl" begin + @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic_curved.jl"), + trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5), + l2 = [0.012380458938507371], + linf = [0.10860506906472567] + ) + end + end # Clean up afterwards: delete Trixi.jl output directory diff --git a/test/test_tree_1d_advection.jl b/test/test_tree_1d_advection.jl index d8ece1d055e..0cf0f2c1170 100644 --- a/test/test_tree_1d_advection.jl +++ b/test/test_tree_1d_advection.jl @@ -27,6 +27,12 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem") linf = [3.235356127918171e-5], coverage_override = (maxiters=6,)) end + + @trixi_testset "elixir_advection_finite_volume.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_finite_volume.jl"), + l2 = [0.011662300515980219], + linf = [0.01647256923710194]) + end end end # module diff --git a/utils/build_sysimage.jl b/utils/build_sysimage.jl index ff99cc872fd..69bce54b269 100755 --- a/utils/build_sysimage.jl +++ b/utils/build_sysimage.jl @@ -45,7 +45,7 @@ start_time = time() # Create a temporary environment to install all necessary packages without modifying # the users environment -Pkg.activate(temp=true) +Pkg.activate(temp = true) # Add package compiler, Trixi.jl, and additional packages that shall be built into the sysimage Pkg.add("PackageCompiler") @@ -56,21 +56,22 @@ Pkg.add("Trixi") # of the current temporary project if we do not want to bundle Trixi.jl into the sysimage. packages = Symbol[:OrdinaryDiffEq, :Plots, :Trixi2Vtk] if lowercase(get(ENV, "TRIXI_SYSIMAGE_INCLUDE_TRIXI", "no")) in ("yes", "1", "true") - # If Trixi.jl is to be included, just add it to the list - push!(packages, :Trixi) + # If Trixi.jl is to be included, just add it to the list + push!(packages, :Trixi) else - # Otherwise, figure out all direct dependencies and add them instead - # Inspired by: https://github.com/CliMA/ClimateMachine.jl/blob/8c57fb55acc20ee824ea37478395a7cb07c5a93c/.dev/systemimage/climate_machine_image.jl - trixi_uuid = Base.UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") - append!(packages, Symbol[Symbol(v) for v in keys(Pkg.dependencies()[trixi_uuid].dependencies)]) + # Otherwise, figure out all direct dependencies and add them instead + # Inspired by: https://github.com/CliMA/ClimateMachine.jl/blob/8c57fb55acc20ee824ea37478395a7cb07c5a93c/.dev/systemimage/climate_machine_image.jl + trixi_uuid = Base.UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") + append!(packages, + Symbol[Symbol(v) for v in keys(Pkg.dependencies()[trixi_uuid].dependencies)]) end map(Pkg.add ∘ string, packages) Pkg.precompile() - # Collect remaining arguments -sysimage_path = get(ENV, "TRIXI_SYSIMAGE_PATH", joinpath(@__DIR__, "TrixiSysimage." * Libdl.dlext)) +sysimage_path = get(ENV, "TRIXI_SYSIMAGE_PATH", + joinpath(@__DIR__, "TrixiSysimage." * Libdl.dlext)) precompile_execution_file = joinpath(@__DIR__, "precompile_execution_file.jl") # Create system image @@ -79,12 +80,10 @@ precompile_execution_file = joinpath(@__DIR__, "precompile_execution_file.jl") @info "Precompile execution file: $precompile_execution_file" using PackageCompiler -PackageCompiler.create_sysimage( - packages, - sysimage_path=sysimage_path, - precompile_execution_file=precompile_execution_file, - cpu_target=PackageCompiler.default_app_cpu_target() -) +PackageCompiler.create_sysimage(packages, + sysimage_path = sysimage_path, + precompile_execution_file = precompile_execution_file, + cpu_target = PackageCompiler.default_app_cpu_target()) duration = time() - start_time @info "Done. Created sysimage in $duration seconds." diff --git a/utils/euler-manufactured.jl b/utils/euler-manufactured.jl index 6b3e04d2fc6..7e19c4e64a6 100644 --- a/utils/euler-manufactured.jl +++ b/utils/euler-manufactured.jl @@ -21,7 +21,6 @@ julia> euler3d() using Reduce @force using Reduce.Algebra - # Original Reduce code (CompressibleEulerEquations 1D) #= clear(γ,f,A,ω,c,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,p,x,y,t,u1,u2,u3,u4); @@ -39,24 +38,24 @@ source_rho_v1 := df(rho_v1, t) + df(rho * v1^2 + p, x) source_rho_e := df(rho_e, t) + df((rho_e + p) * v1, x) =# - function euler1d() - quote - ini = c + a * sin(ω * (x - t)) - rho = ini - rho_v1 = ini - rho_e = ini^2 - - v1 = rho_v1 / rho - p = (γ - 1) * (rho_e - 1/2 * rho * v1^2) - - source_rho = df(rho, t) + df(rho_v1, x) - source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) - source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) - end |> rcall + quote + ini = c + a * sin(ω * (x - t)) + rho = ini + rho_v1 = ini + rho_e = ini^2 + + v1 = rho_v1 / rho + p = (γ - 1) * (rho_e - 1 / 2 * rho * v1^2) + + #! format: off + source_rho = df(rho, t) + df(rho_v1, x) + source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) + source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) + #! format: on + end |> rcall end - # Original Reduce code (CompressibleEulerEquations 2D) #= clear(γ,f,A,ω,c,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,p,x,y,t,u1,u2,u3,u4); @@ -77,27 +76,27 @@ source_rho_v2 := df(rho_v2, t) + df(rho * v1 * v2, x) + df(rho * v2^2 + p, y) source_rho_e := df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y); =# - function euler2d() - quote - ini = c + a * sin(ω * (x + y - t)) - rho = ini - rho_v1 = ini - rho_v2 = ini - rho_e = ini^2 - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - p = (γ - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2)) - - source_rho = df(rho, t) + df(rho_v1, x) + df(rho_v2, y) - source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) + df(rho * v1 * v2, y) - source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x) + df(rho * v2^2 + p, y) - source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) - end |> rcall + quote + ini = c + a * sin(ω * (x + y - t)) + rho = ini + rho_v1 = ini + rho_v2 = ini + rho_e = ini^2 + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + p = (γ - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2)) + + #! format: off + source_rho = df(rho, t) + df(rho_v1, x) + df(rho_v2, y) + source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) + df(rho * v1 * v2, y) + source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x) + df(rho * v2^2 + p, y) + source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) + #! format: on + end |> rcall end - # Original Reduce code (CompressibleEulerEquations 3D) #= clear(γ,f,A,ω,c,a1,a2,a3,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,v3,p,x,y,z,t); @@ -122,23 +121,25 @@ source_rho_e := df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, =# function euler3d() - quote - ini = c + a * sin(ω * (x + y + z - t)) - rho = ini - rho_v1 = ini - rho_v2 = ini - rho_v3 = ini - rho_e = ini^2 - - v1 = rho_v1 / rho - v2 = rho_v2 / rho - v3 = rho_v3 / rho - p = (γ - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2 + v3^2)) - - source_rho = df(rho, t) + df(rho_v1, x) + df(rho_v2, y) + df(rho_v3, z) - source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) + df(rho * v1 * v2, y) + df(rho * v1 * v3, z) - source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x) + df(rho * v2^2 + p, y) + df(rho * v2 * v3, z) - source_rho_v3 = df(rho_v3, t) + df(rho * v1 * v3, x) + df(rho * v3 * v3, y) + df(rho * v3^2 + p, z) - source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) + df((rho_e + p) * v3, z) - end |> rcall + quote + ini = c + a * sin(ω * (x + y + z - t)) + rho = ini + rho_v1 = ini + rho_v2 = ini + rho_v3 = ini + rho_e = ini^2 + + v1 = rho_v1 / rho + v2 = rho_v2 / rho + v3 = rho_v3 / rho + p = (γ - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2 + v3^2)) + + #! format: off + source_rho = df(rho, t) + df(rho_v1, x) + df(rho_v2, y) + df(rho_v3, z) + source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x) + df(rho * v1 * v2, y) + df(rho * v1 * v3, z) + source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x) + df(rho * v2^2 + p, y) + df(rho * v2 * v3, z) + source_rho_v3 = df(rho_v3, t) + df(rho * v1 * v3, x) + df(rho * v3 * v3, y) + df(rho * v3^2 + p, z) + source_rho_e = df(rho_e, t) + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) + df((rho_e + p) * v3, z) + #! format: on + end |> rcall end diff --git a/utils/julia-format.jl b/utils/julia-format.jl deleted file mode 100755 index f53b5c0ceca..00000000000 --- a/utils/julia-format.jl +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env julia - -using ArgParse: ArgParseSettings, @add_arg_table, parse_args -using JuliaFormatter: format - - -function main() - # Parse command line arguments - args = parse_commandline_arguments() - - # Call formatter with our default options - format(args["path"], - overwrite = true, - verbose = true, - indent = 2, - margin = 100, - always_for_in = true) -end - - -function parse_commandline_arguments() - s = ArgParseSettings() - @add_arg_table s begin - "path" - help = ("Name of file or folder to format. If PATH is a folder, " - * "its contents are examined recursively and all `.jl` files are formatted.") - arg_type = String - required = true - nargs = '+' - end - - return parse_args(s) -end - - -if abspath(PROGRAM_FILE) == @__FILE__ - main() -end diff --git a/utils/precompile_execution_file.jl b/utils/precompile_execution_file.jl index c7a56f1a67b..3117c2b1589 100644 --- a/utils/precompile_execution_file.jl +++ b/utils/precompile_execution_file.jl @@ -1,3 +1,4 @@ +#! format: off using Trixi trixi_include(default_example()) diff --git a/utils/trixi-format.jl b/utils/trixi-format.jl new file mode 100755 index 00000000000..d1e7efa656a --- /dev/null +++ b/utils/trixi-format.jl @@ -0,0 +1,30 @@ +#!/usr/bin/env julia + +using Pkg +Pkg.activate(; temp = true, io = devnull) +Pkg.add("JuliaFormatter"; preserve = PRESERVE_ALL, io = devnull) + +using JuliaFormatter: format + +function main() + # Show help + if "-h" in ARGS || "--help" in ARGS + println("usage: trixi-format.jl PATH [PATH...]") + println() + println("positional arguments:") + println() + println(" PATH One or more paths (directories or files) to format. Default: '.'") + return nothing + end + + # Set default path if none is given on command line + if isempty(ARGS) + paths = String["."] + else + paths = ARGS + end + + return format(paths) +end + +main() diff --git a/utils/trixi2tec.jl b/utils/trixi2tec.jl index 1c76bc27623..fc5f3e705c2 100644 --- a/utils/trixi2tec.jl +++ b/utils/trixi2tec.jl @@ -32,64 +32,67 @@ julia> trixi2tec(sol, "mydata_primitive.tec", solution_variables=cons2prim) This is an experimental feature and *not* part of the official Trixi.jl API. Specifically, this function may change (or even be removed) in future releases without warning. """ -function trixi2tec(u, semi, filename; title=basename(filename), solution_variables=cons2cons) - # Extract fundamental building blocks and auxiliary data - mesh, equations, solver, cache = Trixi.mesh_equations_solver_cache(semi) - @unpack node_coordinates = cache.elements +function trixi2tec(u, semi, filename; title = basename(filename), + solution_variables = cons2cons) + # Extract fundamental building blocks and auxiliary data + mesh, equations, solver, cache = Trixi.mesh_equations_solver_cache(semi) + @unpack node_coordinates = cache.elements - # Collect variable names and size information - ndims = Trixi.ndims(semi) - if ndims == 1 - variables = ["x"] - ndofs_x = size(u, 2) - indices = CartesianIndices((ndofs_x,)) - zone_info = "ZONE I=$ndofs_x, F=POINT\n" - elseif ndims == 2 - variables = ["x", "y"] - ndofs_x = size(u, 2) - ndofs_y = size(u, 3) - indices = CartesianIndices((ndofs_x, ndofs_y)) - zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, F=POINT\n" - elseif ndims == 3 - variables = ["x", "y", "z"] - ndofs_x = size(u, 2) - ndofs_y = size(u, 3) - ndofs_z = size(u, 4) - indices = CartesianIndices((ndofs_x, ndofs_y, ndofs_z)) - zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, K=$ndofs_z, F=POINT\n" - else - error("Unsupported number of dimensions (must be 1, 2, or 3)") - end - push!(variables, Trixi.varnames(solution_variables, equations)...) - variables_list = join(variables, "\", \"") + # Collect variable names and size information + ndims = Trixi.ndims(semi) + if ndims == 1 + variables = ["x"] + ndofs_x = size(u, 2) + indices = CartesianIndices((ndofs_x,)) + zone_info = "ZONE I=$ndofs_x, F=POINT\n" + elseif ndims == 2 + variables = ["x", "y"] + ndofs_x = size(u, 2) + ndofs_y = size(u, 3) + indices = CartesianIndices((ndofs_x, ndofs_y)) + zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, F=POINT\n" + elseif ndims == 3 + variables = ["x", "y", "z"] + ndofs_x = size(u, 2) + ndofs_y = size(u, 3) + ndofs_z = size(u, 4) + indices = CartesianIndices((ndofs_x, ndofs_y, ndofs_z)) + zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, K=$ndofs_z, F=POINT\n" + else + error("Unsupported number of dimensions (must be 1, 2, or 3)") + end + push!(variables, Trixi.varnames(solution_variables, equations)...) + variables_list = join(variables, "\", \"") - # Write tec file - open(filename, "w") do io - write(io, """TITLE = "$title"\n""") - write(io, """VARIABLES = "$variables_list"\n""") - for element in eachelement(solver, cache) - write(io, zone_info) - for ci in indices - node_coords = Trixi.get_node_coords(node_coordinates, equations, solver, ci, element) - node_vars = solution_variables(Trixi.get_node_vars(u, equations, solver, ci, element), equations) - print(io, join(node_coords, " ")) - write(io, " ") - print(io, join(node_vars, " ")) - write(io, "\n") - end # k, j, i - end # element - end + # Write tec file + open(filename, "w") do io + write(io, """TITLE = "$title"\n""") + write(io, """VARIABLES = "$variables_list"\n""") + for element in eachelement(solver, cache) + write(io, zone_info) + for ci in indices + node_coords = Trixi.get_node_coords(node_coordinates, equations, solver, ci, + element) + node_vars = solution_variables(Trixi.get_node_vars(u, equations, solver, ci, + element), equations) + print(io, join(node_coords, " ")) + write(io, " ") + print(io, join(node_vars, " ")) + write(io, "\n") + end # k, j, i + end # element + end end # Convenience function to allow calling `trixi2tec` with the `sol` variable function trixi2tec(sol, filename; kwargs...) - semi = sol.prob.p - u_ode = sol.u[end] - trixi2tec(u_ode, semi, filename; kwargs...) + semi = sol.prob.p + u_ode = sol.u[end] + trixi2tec(u_ode, semi, filename; kwargs...) end # Convenience function to allow calling `trixi2tec` with, e.g., the initial condition function trixi2tec(u_ode::Vector{<:Real}, semi, filename; kwargs...) - u = Trixi.wrap_array_native(u_ode, semi) - trixi2tec(u, semi, filename; kwargs...) + u = Trixi.wrap_array_native(u_ode, semi) + trixi2tec(u, semi, filename; kwargs...) end diff --git a/utils/trixi2txt.jl b/utils/trixi2txt.jl index 1fb631c9f83..b386f150da4 100644 --- a/utils/trixi2txt.jl +++ b/utils/trixi2txt.jl @@ -35,323 +35,324 @@ include("../src/solvers/dgsem/basis_lobatto_legendre.jl") include("../src/solvers/dgsem/interpolation.jl") function trixi2txt(filename::AbstractString...; - variables=[], output_directory=".", nvisnodes=nothing, max_supported_level=11) - # Convert filenames to a single list of strings - if isempty(filename) - error("no input file was provided") - end - filenames = String[] - for pattern in filename - append!(filenames, glob(pattern)) - end - - # Iterate over input files - for (index, filename) in enumerate(filenames) - # Check if data file exists - if !isfile(filename) - error("file '$filename' does not exist") + variables = [], output_directory = ".", nvisnodes = nothing, + max_supported_level = 11) + # Convert filenames to a single list of strings + if isempty(filename) + error("no input file was provided") end - - # Make sure it is a data file - if !is_solution_restart_file(filename) - error("file '$filename' is not a data file") + filenames = String[] + for pattern in filename + append!(filenames, glob(pattern)) end - # Get mesh file name - meshfile = extract_mesh_filename(filename) + # Iterate over input files + for (index, filename) in enumerate(filenames) + # Check if data file exists + if !isfile(filename) + error("file '$filename' does not exist") + end - # Check if mesh file exists - if !isfile(meshfile) - error("mesh file '$meshfile' does not exist") - end + # Make sure it is a data file + if !is_solution_restart_file(filename) + error("file '$filename' is not a data file") + end - # Read mesh - center_level_0, length_level_0, leaf_cells, coordinates, levels = read_meshfile(meshfile) + # Get mesh file name + meshfile = extract_mesh_filename(filename) - # Read data - labels, data, n_elements, n_nodes, element_variables, time = read_datafile(filename) + # Check if mesh file exists + if !isfile(meshfile) + error("mesh file '$meshfile' does not exist") + end - # Check if dimensions match - if length(leaf_cells) != n_elements - error("number of elements in '$(filename)' do not match number of leaf cells in " * - "'$(meshfile)' " * - "(did you forget to clean your 'out/' directory between different runs?)") - end + # Read mesh + center_level_0, length_level_0, leaf_cells, coordinates, levels = read_meshfile(meshfile) - # Determine resolution for data interpolation - max_level = maximum(levels) - if max_level > max_supported_level - error("Maximum refinement level in data file $max_level is higher than " * - "maximum supported level $max_supported_level") - end - max_available_nodes_per_finest_element = 2^(max_supported_level - max_level) - if nvisnodes == nothing - max_nvisnodes = 2 * n_nodes - elseif nvisnodes == 0 - max_nvisnodes = n_nodes - else - max_nvisnodes = nvisnodes - end - nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes) - resolution = nvisnodes_at_max_level * 2^max_level - nvisnodes_per_level = [2^(max_level - level)*nvisnodes_at_max_level for level in 0:max_level] - - # Interpolate data - structured_data = unstructured2structured(data, levels, resolution, nvisnodes_per_level) - - # Interpolate cell-centered values to node-centered values - node_centered_data = cell2node(structured_data) - - # Determine x coordinates - xs = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[1] - - # Check that all variables exist in data file - if isempty(variables) - append!(variables, labels) - else - for var in variables - if !(var in labels) - error("variable '$var' does not exist in the data file $filename") + # Read data + labels, data, n_elements, n_nodes, element_variables, time = read_datafile(filename) + + # Check if dimensions match + if length(leaf_cells) != n_elements + error("number of elements in '$(filename)' do not match number of leaf cells in " * + "'$(meshfile)' " * + "(did you forget to clean your 'out/' directory between different runs?)") + end + + # Determine resolution for data interpolation + max_level = maximum(levels) + if max_level > max_supported_level + error("Maximum refinement level in data file $max_level is higher than " * + "maximum supported level $max_supported_level") + end + max_available_nodes_per_finest_element = 2^(max_supported_level - max_level) + if nvisnodes == nothing + max_nvisnodes = 2 * n_nodes + elseif nvisnodes == 0 + max_nvisnodes = n_nodes + else + max_nvisnodes = nvisnodes + end + nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes) + resolution = nvisnodes_at_max_level * 2^max_level + nvisnodes_per_level = [2^(max_level - level) * nvisnodes_at_max_level + for level in 0:max_level] + + # Interpolate data + structured_data = unstructured2structured(data, levels, resolution, + nvisnodes_per_level) + + # Interpolate cell-centered values to node-centered values + node_centered_data = cell2node(structured_data) + + # Determine x coordinates + xs = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+ + center_level_0[1] + + # Check that all variables exist in data file + if isempty(variables) + append!(variables, labels) + else + for var in variables + if !(var in labels) + error("variable '$var' does not exist in the data file $filename") + end + end end - end - end - # Create output directory if it does not exist - mkpath(output_directory) - - # Determine output file name - base, _ = splitext(splitdir(filename)[2]) - output_filename = joinpath(output_directory, "$(base).txt") - - # Write to file - open(output_filename, "w") do io - # Header - print(io, "x ") - for label in variables - @printf(io, " %-14s", label) - end - println(io) - - # Data - for idx in 1:length(xs) - @printf(io, "%+10.8e", xs[idx]) - for variable_id in 1:length(variables) - @printf(io, " %+10.8e ", node_centered_data[idx, variable_id]) + # Create output directory if it does not exist + mkpath(output_directory) + + # Determine output file name + base, _ = splitext(splitdir(filename)[2]) + output_filename = joinpath(output_directory, "$(base).txt") + + # Write to file + open(output_filename, "w") do io + # Header + print(io, "x ") + for label in variables + @printf(io, " %-14s", label) + end + println(io) + + # Data + for idx in 1:length(xs) + @printf(io, "%+10.8e", xs[idx]) + for variable_id in 1:length(variables) + @printf(io, " %+10.8e ", node_centered_data[idx, variable_id]) + end + println(io) + end end - println(io) - end end - end end - # Check if file is a data file function is_solution_restart_file(filename::String) - # Open file for reading - h5open(filename, "r") do file - # If attribute "mesh_file" exists, this must be a data file - return haskey(attributes(file), "mesh_file") - end + # Open file for reading + h5open(filename, "r") do file + # If attribute "mesh_file" exists, this must be a data file + return haskey(attributes(file), "mesh_file") + end end - # Use data file to extract mesh filename from attributes function extract_mesh_filename(filename::String) - # Open file for reading - h5open(filename, "r") do file - # Extract filename relative to data file - mesh_file = read(attributes(file)["mesh_file"]) + # Open file for reading + h5open(filename, "r") do file + # Extract filename relative to data file + mesh_file = read(attributes(file)["mesh_file"]) - return joinpath(dirname(filename), mesh_file) - end + return joinpath(dirname(filename), mesh_file) + end end - # Read in mesh file and return relevant data function read_meshfile(filename::String) - # Open file for reading - h5open(filename, "r") do file - # Check dimension - only 1D supported - if haskey(attributes(file), "ndims") - ndims_ = read(attributes(file)["ndims"]) - else - ndims_ = read(attributes(file)["ndim"]) # FIXME once Trixi.jl's 3D branch is merged & released - end - if ndims_ != 1 - error("currently only 1D files can be processed, but '$filename' is $(ndims_)D") - end + # Open file for reading + h5open(filename, "r") do file + # Check dimension - only 1D supported + if haskey(attributes(file), "ndims") + ndims_ = read(attributes(file)["ndims"]) + else + ndims_ = read(attributes(file)["ndim"]) # FIXME once Trixi.jl's 3D branch is merged & released + end + if ndims_ != 1 + error("currently only 1D files can be processed, but '$filename' is $(ndims_)D") + end - # Extract basic information - n_cells = read(attributes(file)["n_cells"]) - n_leaf_cells = read(attributes(file)["n_leaf_cells"]) - center_level_0 = read(attributes(file)["center_level_0"]) - length_level_0 = read(attributes(file)["length_level_0"]) - - # Extract coordinates, levels, child cells - coordinates = Array{Float64}(undef, ndims_, n_cells) - coordinates .= read(file["coordinates"]) - levels = Array{Int}(undef, n_cells) - levels .= read(file["levels"]) - child_ids = Array{Int}(undef, 2^ndims_, n_cells) - child_ids .= read(file["child_ids"]) - - # Extract leaf cells (= cells to be plotted) and contract all other arrays accordingly - leaf_cells = similar(levels) - n_cells = 0 - for cell_id in 1:length(levels) - if sum(child_ids[:, cell_id]) > 0 - continue - end - - n_cells += 1 - leaf_cells[n_cells] = cell_id - end - leaf_cells = leaf_cells[1:n_cells] + # Extract basic information + n_cells = read(attributes(file)["n_cells"]) + n_leaf_cells = read(attributes(file)["n_leaf_cells"]) + center_level_0 = read(attributes(file)["center_level_0"]) + length_level_0 = read(attributes(file)["length_level_0"]) + + # Extract coordinates, levels, child cells + coordinates = Array{Float64}(undef, ndims_, n_cells) + coordinates .= read(file["coordinates"]) + levels = Array{Int}(undef, n_cells) + levels .= read(file["levels"]) + child_ids = Array{Int}(undef, 2^ndims_, n_cells) + child_ids .= read(file["child_ids"]) + + # Extract leaf cells (= cells to be plotted) and contract all other arrays accordingly + leaf_cells = similar(levels) + n_cells = 0 + for cell_id in 1:length(levels) + if sum(child_ids[:, cell_id]) > 0 + continue + end + + n_cells += 1 + leaf_cells[n_cells] = cell_id + end + leaf_cells = leaf_cells[1:n_cells] - coordinates = coordinates[:, leaf_cells] - levels = levels[leaf_cells] + coordinates = coordinates[:, leaf_cells] + levels = levels[leaf_cells] - return center_level_0, length_level_0, leaf_cells, coordinates, levels - end + return center_level_0, length_level_0, leaf_cells, coordinates, levels + end end - # Read in data file and return all relevant information function read_datafile(filename::String) - # Open file for reading - h5open(filename, "r") do file - # Extract basic information - if haskey(attributes(file), "ndims") - ndims_ = read(attributes(file)["ndims"]) - else - ndims_ = read(attributes(file)["ndim"]) - end - if haskey(attributes(file), "polydeg") - polydeg = read(attributes(file)["polydeg"]) - else - polydeg = read(attributes(file)["N"]) - end - n_elements = read(attributes(file)["n_elements"]) - n_variables = read(attributes(file)["n_vars"]) - time = read(attributes(file)["time"]) - - # Extract labels for legend - labels = Array{String}(undef, 1, n_variables) - for v = 1:n_variables - labels[1, v] = read(attributes(file["variables_$v"])["name"]) - end + # Open file for reading + h5open(filename, "r") do file + # Extract basic information + if haskey(attributes(file), "ndims") + ndims_ = read(attributes(file)["ndims"]) + else + ndims_ = read(attributes(file)["ndim"]) + end + if haskey(attributes(file), "polydeg") + polydeg = read(attributes(file)["polydeg"]) + else + polydeg = read(attributes(file)["N"]) + end + n_elements = read(attributes(file)["n_elements"]) + n_variables = read(attributes(file)["n_vars"]) + time = read(attributes(file)["time"]) + + # Extract labels for legend + labels = Array{String}(undef, 1, n_variables) + for v in 1:n_variables + labels[1, v] = read(attributes(file["variables_$v"])["name"]) + end - # Extract data arrays - n_nodes = polydeg + 1 - - if ndims_ == 1 - data = Array{Float64}(undef, n_nodes, n_elements, n_variables) - for v = 1:n_variables - vardata = read(file["variables_$v"]) - @views data[:, :, v][:] .= vardata - end - else - error("Unsupported number of spatial dimensions: ", ndims_) - end + # Extract data arrays + n_nodes = polydeg + 1 + + if ndims_ == 1 + data = Array{Float64}(undef, n_nodes, n_elements, n_variables) + for v in 1:n_variables + vardata = read(file["variables_$v"]) + @views data[:, :, v][:] .= vardata + end + else + error("Unsupported number of spatial dimensions: ", ndims_) + end - # Extract element variable arrays - element_variables = Dict{String, Union{Vector{Float64}, Vector{Int}}}() - index = 1 - while haskey(file, "element_variables_$index") - varname = read(attributes(file["element_variables_$index"])["name"]) - element_variables[varname] = read(file["element_variables_$index"]) - index +=1 - end + # Extract element variable arrays + element_variables = Dict{String, Union{Vector{Float64}, Vector{Int}}}() + index = 1 + while haskey(file, "element_variables_$index") + varname = read(attributes(file["element_variables_$index"])["name"]) + element_variables[varname] = read(file["element_variables_$index"]) + index += 1 + end - return labels, data, n_elements, n_nodes, element_variables, time - end + return labels, data, n_elements, n_nodes, element_variables, time + end end - # Interpolate unstructured DG data to structured data (cell-centered) function unstructured2structured(unstructured_data::AbstractArray{Float64}, levels::AbstractArray{Int}, resolution::Int, nvisnodes_per_level::AbstractArray{Int}) - # Extract data shape information - n_nodes_in, n_elements, n_variables = size(unstructured_data) - - # Get node coordinates for DG locations on reference element - nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) - - # Calculate interpolation vandermonde matrices for each level - max_level = length(nvisnodes_per_level) - 1 - vandermonde_per_level = [] - for l in 0:max_level - n_nodes_out = nvisnodes_per_level[l + 1] - dx = 2 / n_nodes_out - nodes_out = collect(range(-1 + dx/2, 1 - dx/2, length=n_nodes_out)) - push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out)) - end - - # Create output data structure - structured = Array{Float64}(undef, resolution, n_variables) - - # For each variable, interpolate element data and store to global data structure - for v in 1:n_variables - first = 1 - - # Reshape data array for use in interpolate_nodes function - @views reshaped_data = reshape(unstructured_data[:, :, v], 1, n_nodes_in, n_elements) - - for element_id in 1:n_elements - # Extract level for convenience - level = levels[element_id] - - # Determine target indices - n_nodes_out = nvisnodes_per_level[level + 1] - last = first + (n_nodes_out - 1) - - # Interpolate data - vandermonde = vandermonde_per_level[level + 1] - @views structured[first:last, v] .= ( - reshape(multiply_dimensionwise_naive(reshaped_data[:, :, element_id], vandermonde), - n_nodes_out)) - - # Update first index for next iteration - first += n_nodes_out + # Extract data shape information + n_nodes_in, n_elements, n_variables = size(unstructured_data) + + # Get node coordinates for DG locations on reference element + nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in) + + # Calculate interpolation vandermonde matrices for each level + max_level = length(nvisnodes_per_level) - 1 + vandermonde_per_level = [] + for l in 0:max_level + n_nodes_out = nvisnodes_per_level[l + 1] + dx = 2 / n_nodes_out + nodes_out = collect(range(-1 + dx / 2, 1 - dx / 2, length = n_nodes_out)) + push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out)) end - end - return structured -end + # Create output data structure + structured = Array{Float64}(undef, resolution, n_variables) + + # For each variable, interpolate element data and store to global data structure + for v in 1:n_variables + first = 1 + + # Reshape data array for use in interpolate_nodes function + @views reshaped_data = reshape(unstructured_data[:, :, v], 1, n_nodes_in, + n_elements) + + for element_id in 1:n_elements + # Extract level for convenience + level = levels[element_id] + + # Determine target indices + n_nodes_out = nvisnodes_per_level[level + 1] + last = first + (n_nodes_out - 1) + + # Interpolate data + vandermonde = vandermonde_per_level[level + 1] + @views structured[first:last, v] .= (reshape(multiply_dimensionwise_naive(reshaped_data[:, + :, + element_id], + vandermonde), + n_nodes_out)) + # Update first index for next iteration + first += n_nodes_out + end + end + + return structured +end # Convert cell-centered values to node-centered values by averaging over all # four neighbors and making use of the periodicity of the solution function cell2node(cell_centered_data::AbstractArray{Float64}) - # Create temporary data structure to make the averaging algorithm as simple - # as possible (by using a ghost layer) - tmp = similar(cell_centered_data, size(cell_centered_data) .+ (2, 0)) - - # Fill center with original data - tmp[2:end-1, :] .= cell_centered_data - - # # Fill sides with opposite data (periodic domain) - # # x-direction - # tmp[1, :] .= cell_centered_data[end, :] - # tmp[end, :] .= cell_centered_data[1, :] - - # Fill sides with duplicate information - # x-direction - tmp[1, :] .= cell_centered_data[1, :] - tmp[end, :] .= cell_centered_data[end, :] - - # Create output data structure - resolution_in, n_variables = size(cell_centered_data) - resolution_out = resolution_in + 1 - node_centered_data = Array{Float64}(undef, resolution_out, n_variables) - - # Obtain node-centered value by averaging over neighboring cell-centered values - for i in 1:resolution_out - node_centered_data[i, :] = (tmp[i, :] + tmp[i+1, :]) / 2 - end - - return node_centered_data + # Create temporary data structure to make the averaging algorithm as simple + # as possible (by using a ghost layer) + tmp = similar(cell_centered_data, size(cell_centered_data) .+ (2, 0)) + + # Fill center with original data + tmp[2:(end - 1), :] .= cell_centered_data + + # # Fill sides with opposite data (periodic domain) + # # x-direction + # tmp[1, :] .= cell_centered_data[end, :] + # tmp[end, :] .= cell_centered_data[1, :] + + # Fill sides with duplicate information + # x-direction + tmp[1, :] .= cell_centered_data[1, :] + tmp[end, :] .= cell_centered_data[end, :] + + # Create output data structure + resolution_in, n_variables = size(cell_centered_data) + resolution_out = resolution_in + 1 + node_centered_data = Array{Float64}(undef, resolution_out, n_variables) + + # Obtain node-centered value by averaging over neighboring cell-centered values + for i in 1:resolution_out + node_centered_data[i, :] = (tmp[i, :] + tmp[i + 1, :]) / 2 + end + + return node_centered_data end end